diff --git a/.gitattributes b/.gitattributes index 68bbd5250943c306848fa6cb60438474a8d9529c..7853af2f4e5e2cb2c8d8a28cb2f182d63d279dc9 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3596,6 +3596,143 @@ RTCP/CNProc/test/tPencilBeamFormer.sh -text RTCP/CNProc/test/tStokes.cc -text RTCP/CNProc/test/tStokes.sh -text RTCP/CNProc/test/tStokesAsm.cc -text +RTCP/GPUProc/CMakeLists.txt -text +RTCP/GPUProc/OpenCL_FFT/CMakeLists.txt -text +RTCP/GPUProc/OpenCL_FFT/src/AccelerateError.pdf -text +RTCP/GPUProc/OpenCL_FFT/src/CMakeLists.txt -text +RTCP/GPUProc/OpenCL_FFT/src/Error.pdf -text +RTCP/GPUProc/OpenCL_FFT/src/Makefile -text +RTCP/GPUProc/OpenCL_FFT/src/Makefile.not -text +RTCP/GPUProc/OpenCL_FFT/src/OpenCLError.pdf -text svneol=unset#unset +RTCP/GPUProc/OpenCL_FFT/src/OpenCL_FFT.xcodeproj/project.pbxproj -text +RTCP/GPUProc/OpenCL_FFT/src/ReadMe.txt -text +RTCP/GPUProc/OpenCL_FFT/src/clFFT.h -text +RTCP/GPUProc/OpenCL_FFT/src/fft_base_kernels.h -text +RTCP/GPUProc/OpenCL_FFT/src/fft_execute.cpp -text +RTCP/GPUProc/OpenCL_FFT/src/fft_internal.h -text +RTCP/GPUProc/OpenCL_FFT/src/fft_kernelstring.cpp -text +RTCP/GPUProc/OpenCL_FFT/src/fft_kernelstring.cpp.orig -text +RTCP/GPUProc/OpenCL_FFT/src/fft_setup.cpp -text +RTCP/GPUProc/OpenCL_FFT/src/libOpenCL_FFT.a.not -text +RTCP/GPUProc/OpenCL_FFT/src/main.cpp -text +RTCP/GPUProc/OpenCL_FFT/src/param.txt -text +RTCP/GPUProc/OpenCL_FFT/src/procs.h -text +RTCP/GPUProc/clAmdFft/appmlEnv.sh -text +RTCP/GPUProc/clAmdFft/bin32/clAmdFft.Client -text +RTCP/GPUProc/clAmdFft/bin32/clAmdFft.Client-1.8.291 -text +RTCP/GPUProc/clAmdFft/bin64/clAmdFft.Client -text +RTCP/GPUProc/clAmdFft/bin64/clAmdFft.Client-1.8.291 -text +RTCP/GPUProc/clAmdFft/clAmdFft-1.8.291-Linux.tar.gz -text +RTCP/GPUProc/clAmdFft/clAmdFft-EULA.txt -text +RTCP/GPUProc/clAmdFft/clAmdFft-README.txt -text +RTCP/GPUProc/clAmdFft/clAmdFft1.8.291.tar.gz -text +RTCP/GPUProc/clAmdFft/doc/clAmdFft.refman.pdf -text svneol=unset#unset +RTCP/GPUProc/clAmdFft/include/clAmdFft.h -text +RTCP/GPUProc/clAmdFft/include/clAmdFft.version.h -text +RTCP/GPUProc/clAmdFft/install-clAmdFft-1.8.291.sh -text +RTCP/GPUProc/clAmdFft/samples/CMakeLists.txt -text +RTCP/GPUProc/clAmdFft/samples/amd-unicode.h -text +RTCP/GPUProc/clAmdFft/samples/clAmdFft.client.cpp -text +RTCP/GPUProc/clAmdFft/samples/clAmdFft.client.h -text +RTCP/GPUProc/clAmdFft/samples/clAmdFft.h -text +RTCP/GPUProc/clAmdFft/samples/clAmdFft.openCL.cpp -text +RTCP/GPUProc/clAmdFft/samples/clAmdFft.openCL.h -text +RTCP/GPUProc/clAmdFft/samples/clMemcpy.cpp -text +RTCP/GPUProc/clAmdFft/samples/statisticalTimer.cpp -text +RTCP/GPUProc/clAmdFft/samples/statisticalTimer.h -text +RTCP/GPUProc/clAmdFft/samples/stdafx.cpp -text +RTCP/GPUProc/clAmdFft/samples/stdafx.h -text +RTCP/GPUProc/clAmdFft/samples/targetver.h -text +RTCP/GPUProc/src/Align.h -text +RTCP/GPUProc/src/BandPass.cc -text +RTCP/GPUProc/src/BandPass.h -text +RTCP/GPUProc/src/BeamFormer/BeamFormer.cl -text +RTCP/GPUProc/src/BeamFormer/BeamFormer.cl-0.ptx -text +RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.4x3 -text +RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.6x3 -text +RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.bak -text +RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.not -text +RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.ok -text +RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.orig -text +RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl -text +RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl-0.ptx -text +RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl.ok -text +RTCP/GPUProc/src/BeamFormer/Dedispersion.cl -text +RTCP/GPUProc/src/BeamFormer/Dedispersion.cl-0.ptx -text +RTCP/GPUProc/src/BeamFormer/IncoherentStokes.cl -text +RTCP/GPUProc/src/BeamFormer/IncoherentStokes.cl-0.ptx -text +RTCP/GPUProc/src/BeamFormer/IntToFloat.cl -text +RTCP/GPUProc/src/BeamFormer/IntToFloat.cl-0.ptx -text +RTCP/GPUProc/src/BeamFormer/Transpose.cl -text +RTCP/GPUProc/src/BeamFormer/Transpose.cl-0.ptx -text +RTCP/GPUProc/src/BeamletBuffer.cc -text +RTCP/GPUProc/src/BeamletBuffer.h -text +RTCP/GPUProc/src/BeamletBufferToComputeNode.cc -text +RTCP/GPUProc/src/BeamletBufferToComputeNode.h -text +RTCP/GPUProc/src/CL/cl.hpp -text +RTCP/GPUProc/src/CMakeLists.txt -text +RTCP/GPUProc/src/Correlator.cl -text +RTCP/GPUProc/src/Correlator.cl-0.ptx -text +RTCP/GPUProc/src/DelayAndBandPass.cl -text +RTCP/GPUProc/src/DelayAndBandPass.cl-0.ptx -text +RTCP/GPUProc/src/Delays.cc -text +RTCP/GPUProc/src/Delays.h -text +RTCP/GPUProc/src/FFT.cl -text +RTCP/GPUProc/src/FIR.cl -text +RTCP/GPUProc/src/FIR.cl-0.ptx -text +RTCP/GPUProc/src/FilterBank.cc -text +RTCP/GPUProc/src/FilterBank.h -text +RTCP/GPUProc/src/InputSection.cc -text +RTCP/GPUProc/src/InputSection.h -text +RTCP/GPUProc/src/InputThread.cc -text +RTCP/GPUProc/src/InputThread.h -text +RTCP/GPUProc/src/Job.cc -text +RTCP/GPUProc/src/Job.h -text +RTCP/GPUProc/src/LockedRanges.h -text +RTCP/GPUProc/src/LogThread.cc -text +RTCP/GPUProc/src/LogThread.h -text +RTCP/GPUProc/src/NewCorrelator.cl -text +RTCP/GPUProc/src/NewCorrelator.cl-0.ptx -text +RTCP/GPUProc/src/OpenCL_Support.cc -text +RTCP/GPUProc/src/OpenCL_Support.h -text +RTCP/GPUProc/src/OpenMP_Support.h -text +RTCP/GPUProc/src/RSP.h -text +RTCP/GPUProc/src/RTCP.cc -text +RTCP/GPUProc/src/RTCP.cc.not -text +RTCP/GPUProc/src/RTCP.cc.ok -text +RTCP/GPUProc/src/ReaderWriterSynchronization.cc -text +RTCP/GPUProc/src/ReaderWriterSynchronization.h -text +RTCP/GPUProc/src/Scheduling.cc -text +RTCP/GPUProc/src/Scheduling.h -text +RTCP/GPUProc/src/SlidingPointer.h -text +RTCP/GPUProc/src/UHEP/BeamFormer.cl -text +RTCP/GPUProc/src/UHEP/BeamFormer.cl-0.ptx -text +RTCP/GPUProc/src/UHEP/BeamFormer.cl.4groups -text +RTCP/GPUProc/src/UHEP/BeamFormer.cl.hop -text +RTCP/GPUProc/src/UHEP/BeamFormer.cl.hop-0.ptx -text +RTCP/GPUProc/src/UHEP/BeamFormer.cl.not -text +RTCP/GPUProc/src/UHEP/BeamFormer.cl.ok -text +RTCP/GPUProc/src/UHEP/InvFFT.cl -text +RTCP/GPUProc/src/UHEP/InvFFT.cl-0.ptx -text +RTCP/GPUProc/src/UHEP/InvFIR.cl -text +RTCP/GPUProc/src/UHEP/InvFIR.cl-0.ptx -text +RTCP/GPUProc/src/UHEP/InvertedStationPPFWeights.cc -text +RTCP/GPUProc/src/UHEP/InvertedStationPPFWeights.h -text +RTCP/GPUProc/src/UHEP/Transpose.cl -text +RTCP/GPUProc/src/UHEP/Transpose.cl-0.ptx -text +RTCP/GPUProc/src/UHEP/Transpose.cl.ok -text +RTCP/GPUProc/src/UHEP/Trigger.cl -text +RTCP/GPUProc/src/UHEP/Trigger.cl-0.ptx -text +RTCP/GPUProc/src/UHEP/Trigger.cl.8 -text +RTCP/GPUProc/src/UHEP/Trigger.cl.ok -text +RTCP/GPUProc/src/WallClockTime.h -text +RTCP/GPUProc/src/fft.cl -text +RTCP/GPUProc/src/math.cl -text +RTCP/GPUProc/src/octave-core -text +RTCP/GPUProc/test/77_Stations.parset -text +RTCP/GPUProc/test/AARTFAAC.parset -text +RTCP/GPUProc/test/CMakeLists.txt -text +RTCP/GPUProc/test/small-test.parset -text RTCP/IONProc/src/CommandServer.cc -text RTCP/IONProc/src/CommandServer.h -text RTCP/IONProc/src/GlobalVars.cc -text diff --git a/RTCP/CMakeLists.txt b/RTCP/CMakeLists.txt index 4e00a553ddabd32bee372a1543f1d47e931ea5f5..efd082104d9028586c59877a85f24aad693d2cbd 100644 --- a/RTCP/CMakeLists.txt +++ b/RTCP/CMakeLists.txt @@ -6,6 +6,8 @@ lofar_add_package(CNProc) # BlueGene: Compute-Node Processing applications lofar_add_package(IONProc) # BlueGene: I/O Node Processing applications lofar_add_package(InputProc) # GPU cluster: Station Input Retrieval and Redistribution applications lofar_add_package(Storage) # Stores correlator output in MeasurmentSets +lofar_add_package(GPUProc) # GPU version +lofar_add_package(RTCPTools) # Several RTCP tools lofar_add_package(Run) # Run scripts to start RTCP components lofar_add_package(LofarStMan)# Storage Manager for the main table of a LOFAR MS lofar_add_package(MetaDataGatherer)# Pulls in meta data from OTDB etc diff --git a/RTCP/GPUProc/CMakeLists.txt b/RTCP/GPUProc/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..17281054eed5f5a514cdb7279a0a41e98df1865e --- /dev/null +++ b/RTCP/GPUProc/CMakeLists.txt @@ -0,0 +1,26 @@ +# $Id: CMakeLists.txt 16350 2010-09-20 13:14:52Z nieuwpoort $ + +lofar_package(GPUProc 1.0 DEPENDS Common Stream Interface OpenCL_FFT) + +#add_definitions(-DBOOST_ENABLE_ASSERT_HANDLER) + +include(LofarFindPackage) +lofar_find_package(OpenMP REQUIRED) +lofar_find_package(OpenCL REQUIRED) +lofar_find_package(Boost REQUIRED) +lofar_find_package(Casacore COMPONENTS measures REQUIRED) +lofar_find_package(FFTW3 COMPONENTS single) +if(NOT FFTW3_FOUND) + lofar_find_package(FFTW2 COMPONENTS single real) + if(NOT FFTW2_FOUND) + message(SEND_ERROR "Should have FFTW3 or FFTW2 installed.") + endif(NOT FFTW2_FOUND) +endif(NOT FFTW3_FOUND) + +lofar_find_package(Valgrind) +if(USE_VALGRIND) + add_definitions(-DUSE_VALGRIND) +endif(USE_VALGRIND) + +add_subdirectory(src) +add_subdirectory(test) diff --git a/RTCP/GPUProc/OpenCL_FFT/CMakeLists.txt b/RTCP/GPUProc/OpenCL_FFT/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..cd67bd80a7144ac55300b9fe9c5e2d7d8d9bfe5f --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/CMakeLists.txt @@ -0,0 +1,12 @@ +# $Id: CMakeLists.txt 17975 2011-05-10 09:52:51Z mol $ + +lofar_package(OpenCL_FFT 1.0) + +include(LofarFindPackage) +lofar_find_package(OpenCL REQUIRED) + +if(USE_VALGRIND) + add_definitions(-DUSE_VALGRIND) +endif(USE_VALGRIND) + +add_subdirectory(src) diff --git a/RTCP/GPUProc/OpenCL_FFT/src/AccelerateError.pdf b/RTCP/GPUProc/OpenCL_FFT/src/AccelerateError.pdf new file mode 100644 index 0000000000000000000000000000000000000000..55cd832de7c8b8a1e4ce0e1a640c0d659480b69e Binary files /dev/null and b/RTCP/GPUProc/OpenCL_FFT/src/AccelerateError.pdf differ diff --git a/RTCP/GPUProc/OpenCL_FFT/src/CMakeLists.txt b/RTCP/GPUProc/OpenCL_FFT/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5aaab421d0236d24142d133b2c89da1089288397 --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/CMakeLists.txt @@ -0,0 +1,23 @@ +# $Id: CMakeLists.txt 17003 2011-01-06 08:54:59Z romein $ + +include(LofarPackageVersion) + +# Create symbolic link to include directory. +execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_BINARY_DIR}/include/${PACKAGE_NAME}) + +# Add current source directory to -I path. This is needed because OpenCL_FFT uses +# angle brackets for internal header files, instead of quotes. +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +set(OpenCL_FFT_LIB_SRCS + #Package__Version.cc + fft_execute.cpp + fft_kernelstring.cpp + fft_setup.cpp) + +lofar_add_library(opencl_fft ${OpenCL_FFT_LIB_SRCS}) + +#lofar_add_bin_program(versionopencl_fft versionopencl_fft.cc) #FIXME + diff --git a/RTCP/GPUProc/OpenCL_FFT/src/Error.pdf b/RTCP/GPUProc/OpenCL_FFT/src/Error.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1d252a9c1a0acdfac432caa11e11d705e9d8acc0 Binary files /dev/null and b/RTCP/GPUProc/OpenCL_FFT/src/Error.pdf differ diff --git a/RTCP/GPUProc/OpenCL_FFT/src/Makefile b/RTCP/GPUProc/OpenCL_FFT/src/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..3a25689ec2b15f91e630c4d731748f42f5dc41c2 --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/Makefile @@ -0,0 +1,19 @@ +SRCS = fft_execute.cpp fft_setup.cpp fft_kernelstring.cpp +HEADERS = procs.h fft_internal.h fft_base_kernels.h clFFT.h +LIBRARY = libOpenCL_FFT.a +COMPILERFLAGS = -c -g -Wall -Werror -O3 -I.. +CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} +CC = g++ + +OBJECTS = fft_execute.o fft_setup.o fft_kernelstring.o +TARGETOBJECT = +all: $(LIBRARY) + +$(OBJECTS): $(SRCS) $(HEADERS) + $(CC) $(CFLAGS) $(SRCS) + +$(LIBRARY): $(OBJECTS) + ar r $@ $^ + +clean: + rm -f $(TARGET) $(OBJECTS) diff --git a/RTCP/GPUProc/OpenCL_FFT/src/Makefile.not b/RTCP/GPUProc/OpenCL_FFT/src/Makefile.not new file mode 100644 index 0000000000000000000000000000000000000000..1c65cd84a7c59a494d0891ee9ac7443c1dbcc463 --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/Makefile.not @@ -0,0 +1,28 @@ +ifdef BUILD_WITH_ATF +ATF = -framework ATF +USE_ATF = -DUSE_ATF +endif + +SRCS = fft_execute.cpp fft_setup.cpp main.cpp fft_kernelstring.cpp +HEADERS = procs.h fft_internal.h fft_base_kernels.h clFFT.h +TARGET = test_clFFT +COMPILERFLAGS = -c -g -Wall -Werror -O3 +CFLAGS = $(COMPILERFLAGS) ${RC_CFLAGS} ${USE_ATF} +CC = g++ +LIBRARIES = -framework OpenCL -framework Accelerate -framework AppKit ${RC_CFLAGS} ${ATF} + +OBJECTS = fft_execute.o fft_setup.o main.o fft_kernelstring.o +TARGETOBJECT = +all: $(TARGET) + +$(OBJECTS): $(SRCS) $(HEADERS) + $(CC) $(CFLAGS) $(SRCS) + +$(TARGET): $(OBJECTS) + $(CC) $(OBJECTS) -o $@ $(LIBRARIES) + +clean: + rm -f $(TARGET) $(OBJECTS) + +.DEFAULT: + @echo The target \"$@\" does not exist in Makefile. diff --git a/RTCP/GPUProc/OpenCL_FFT/src/OpenCLError.pdf b/RTCP/GPUProc/OpenCL_FFT/src/OpenCLError.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c42309ff5196000bf9c83f5a7347564312db5a94 Binary files /dev/null and b/RTCP/GPUProc/OpenCL_FFT/src/OpenCLError.pdf differ diff --git a/RTCP/GPUProc/OpenCL_FFT/src/OpenCL_FFT.xcodeproj/project.pbxproj b/RTCP/GPUProc/OpenCL_FFT/src/OpenCL_FFT.xcodeproj/project.pbxproj new file mode 100644 index 0000000000000000000000000000000000000000..5474161f048eb2b61197875e1ff642b9289aad9e --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/OpenCL_FFT.xcodeproj/project.pbxproj @@ -0,0 +1,237 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 45; + objects = { + +/* Begin PBXBuildFile section */ + BE94A7B3108AB33000C1AD87 /* fft_kernelstring.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BE94A7B2108AB33000C1AD87 /* fft_kernelstring.cpp */; }; + BE94A83D108AF8A100C1AD87 /* fft_setup.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BE94A83C108AF8A100C1AD87 /* fft_setup.cpp */; }; + BEE709AF1097B8DD0017B8A5 /* main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BEE709AE1097B8DD0017B8A5 /* main.cpp */; }; + BEEA39EE108BD89D00729F49 /* fft_execute.cpp in Sources */ = {isa = PBXBuildFile; fileRef = BEEA39ED108BD89D00729F49 /* fft_execute.cpp */; }; +/* End PBXBuildFile section */ + +/* Begin PBXCopyFilesBuildPhase section */ + 8DD76F690486A84900D96B5E /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 8; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; +/* End PBXCopyFilesBuildPhase section */ + +/* Begin PBXFileReference section */ + 8DD76F6C0486A84900D96B5E /* OpenCL_FFT */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = OpenCL_FFT; sourceTree = BUILT_PRODUCTS_DIR; }; + BE94A7B2108AB33000C1AD87 /* fft_kernelstring.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fft_kernelstring.cpp; sourceTree = "<group>"; }; + BE94A7CB108AB8BF00C1AD87 /* clFFT.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clFFT.h; sourceTree = "<group>"; }; + BE94A7D4108ABFF000C1AD87 /* fft_internal.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fft_internal.h; sourceTree = "<group>"; }; + BE94A83C108AF8A100C1AD87 /* fft_setup.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fft_setup.cpp; sourceTree = "<group>"; }; + BE9DE3E010923A4E00940D66 /* fft_base_kernels.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fft_base_kernels.h; sourceTree = "<group>"; }; + BE9DE4741092732C00940D66 /* param.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = param.txt; sourceTree = "<group>"; }; + BE9DE4761092732C00940D66 /* procs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = procs.h; sourceTree = "<group>"; }; + BEE709AE1097B8DD0017B8A5 /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = main.cpp; sourceTree = "<group>"; }; + BEEA39ED108BD89D00729F49 /* fft_execute.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fft_execute.cpp; sourceTree = "<group>"; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 8DD76F660486A84900D96B5E /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 08FB7794FE84155DC02AAC07 /* OpenCL_FFT */ = { + isa = PBXGroup; + children = ( + BE9DE4741092732C00940D66 /* param.txt */, + 08FB7795FE84155DC02AAC07 /* Source */, + C6859E8C029090F304C91782 /* Documentation */, + 1AB674ADFE9D54B511CA2CBB /* Products */, + ); + name = OpenCL_FFT; + sourceTree = "<group>"; + }; + 08FB7795FE84155DC02AAC07 /* Source */ = { + isa = PBXGroup; + children = ( + BE9DE4761092732C00940D66 /* procs.h */, + BEEA39ED108BD89D00729F49 /* fft_execute.cpp */, + BE94A83C108AF8A100C1AD87 /* fft_setup.cpp */, + BEE709AE1097B8DD0017B8A5 /* main.cpp */, + BE94A7D4108ABFF000C1AD87 /* fft_internal.h */, + BE9DE3E010923A4E00940D66 /* fft_base_kernels.h */, + BE94A7CB108AB8BF00C1AD87 /* clFFT.h */, + BE94A7B2108AB33000C1AD87 /* fft_kernelstring.cpp */, + ); + name = Source; + sourceTree = "<group>"; + }; + 1AB674ADFE9D54B511CA2CBB /* Products */ = { + isa = PBXGroup; + children = ( + 8DD76F6C0486A84900D96B5E /* OpenCL_FFT */, + ); + name = Products; + sourceTree = "<group>"; + }; + C6859E8C029090F304C91782 /* Documentation */ = { + isa = PBXGroup; + children = ( + ); + name = Documentation; + sourceTree = "<group>"; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 8DD76F620486A84900D96B5E /* OpenCL_FFT */ = { + isa = PBXNativeTarget; + buildConfigurationList = 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "OpenCL_FFT" */; + buildPhases = ( + 8DD76F640486A84900D96B5E /* Sources */, + 8DD76F660486A84900D96B5E /* Frameworks */, + 8DD76F690486A84900D96B5E /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = OpenCL_FFT; + productInstallPath = "$(HOME)/bin"; + productName = OpenCL_FFT; + productReference = 8DD76F6C0486A84900D96B5E /* OpenCL_FFT */; + productType = "com.apple.product-type.tool"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 08FB7793FE84155DC02AAC07 /* Project object */ = { + isa = PBXProject; + buildConfigurationList = 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "OpenCL_FFT" */; + compatibilityVersion = "Xcode 3.1"; + hasScannedForEncodings = 1; + mainGroup = 08FB7794FE84155DC02AAC07 /* OpenCL_FFT */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 8DD76F620486A84900D96B5E /* OpenCL_FFT */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXSourcesBuildPhase section */ + 8DD76F640486A84900D96B5E /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + BE94A7B3108AB33000C1AD87 /* fft_kernelstring.cpp in Sources */, + BE94A83D108AF8A100C1AD87 /* fft_setup.cpp in Sources */, + BEEA39EE108BD89D00729F49 /* fft_execute.cpp in Sources */, + BEE709AF1097B8DD0017B8A5 /* main.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin XCBuildConfiguration section */ + 1DEB923208733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + COPY_PHASE_STRIP = NO; + GCC_DYNAMIC_NO_PIC = NO; + GCC_ENABLE_FIX_AND_CONTINUE = YES; + GCC_MODEL_TUNING = G5; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "_GLIBCXX_DEBUG=1", + "_GLIBCXX_DEBUG_PEDANTIC=1", + ); + INSTALL_PATH = /usr/local/bin; + PRODUCT_NAME = OpenCL_FFT; + }; + name = Debug; + }; + 1DEB923308733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + GCC_MODEL_TUNING = G5; + INSTALL_PATH = /usr/local/bin; + PRODUCT_NAME = OpenCL_FFT; + }; + name = Release; + }; + 1DEB923608733DC60010E9CD /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(NATIVE_ARCH_ACTUAL)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + ONLY_ACTIVE_ARCH = YES; + OTHER_LDFLAGS = ( + "-framework", + OpenCL, + "-framework", + Accelerate, + ); + PREBINDING = NO; + SDKROOT = ""; + }; + name = Debug; + }; + 1DEB923708733DC60010E9CD /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ARCHS = "$(NATIVE_ARCH_ACTUAL)"; + GCC_C_LANGUAGE_STANDARD = gnu99; + GCC_WARN_ABOUT_RETURN_TYPE = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + OTHER_LDFLAGS = ( + "-framework", + OpenCL, + "-framework", + Accelerate, + ); + PREBINDING = NO; + SDKROOT = ""; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 1DEB923108733DC60010E9CD /* Build configuration list for PBXNativeTarget "OpenCL_FFT" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923208733DC60010E9CD /* Debug */, + 1DEB923308733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 1DEB923508733DC60010E9CD /* Build configuration list for PBXProject "OpenCL_FFT" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 1DEB923608733DC60010E9CD /* Debug */, + 1DEB923708733DC60010E9CD /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; +} diff --git a/RTCP/GPUProc/OpenCL_FFT/src/ReadMe.txt b/RTCP/GPUProc/OpenCL_FFT/src/ReadMe.txt new file mode 100644 index 0000000000000000000000000000000000000000..b90455fe13d5f7654f4d651fa81bf1dfc51181cd --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/ReadMe.txt @@ -0,0 +1 @@ +### OpenCL FFT (Fast Fourier Transform) ### =========================================================================== DESCRIPTION: This example shows how OpenCL can be used to compute FFT. Algorithm implemented is described in the following references 1) Fitting FFT onto the G80 Architecture by Vasily Volkov and Brian Kazian University of California, Berkeley, May 19, 2008 http://www.cs.berkeley.edu/~kubitron/courses/cs258-S08/projects/reports/project6_report.pdf 2) High Performance Discrete Fourier Tansforms on Graphics Processors by Naga K. Govindaraju, Brandon Lloyd, Yuri Dotsenko, Burton Smith, and John Manferdelli Supercomputing 2008. http://portal.acm.org/citation.cfm?id=1413373 Current version only supports power of two transform sizes however it should be straight forward to extend the sample to non-power of two but power of base primes like 3, 5, 7. Current version supports 1D, 2D, 3D batched transforms. Current version supports both in-place and out-of-place transforms. Current version supports both forward and inverse transform. Current version supports both plannar and interleaved data format. Current version only supports complex-to-complex transform. For real transform, one can use plannar data format with imaginary array mem set to zero. Current version only supports transform on GPU device. Accelerate framework can be used on CPU. Current version supports sizes that fits in device global memory although "Twist Kernel" is included in fft plan if user wants to virtualize (implement sizes larger than what can fit in GPU global memory). Users can dump all the kernels and global, local dimensions with which these kernels are run so that they can not only inspect/modify these kernels and understand how FFT is being computed on GPU, but also create their own stand along app for executing FFT of size of their interest. For any given signal size n, sample crates a clFFT_Plan, that encapsulates the kernel string, associated compiled cl_program. Note that kernel string is generated at runtime based on input size, dimension (1D, 2D, 3D) and data format (plannar or interleaved) along with some device depended parameters encapsulated in the clFFT_Plan. These device dependent parameters are set such that kernel is generated for high performance meeting following requirements 1) Access pattern to global memory (of-chip DRAM) is such that memory transaction coalesceing is achieved if device supports it thus achieving full bandwidth 2) Local shuffles (matrix transposes or data sharing among work items of a workgroup) are band conflict free if local memory is banked. 3) Kernel is fully optimized for memory hierarcy meaning that it uses GPU's large vector register file, which is fastest, first before reverting to local memory for data sharing among work items to save global DRAM bandwidth and only then reverts to global memory if signal size is such that transform cannnot be computed by singal workgroup and thus require global communation among work groups. Users can modify these parameters to get best performance on their particular GPU. Users how really want to understand the details of implementation are highly encouraged to read above two references but here is a high level description. At a higher the algorithm decomposes signal of length N into factors as N = N1 x N2 x N3 x N4 x .... Nn where the factors (N1, ....., Nn) are sorted such that N1 is largest. It thus decomposes N into n-dimensional matrix. It than applies fft along each dimension, multiply by twiddle factors and transposes the matrix as follow N2 x N3 x N4 x ............ x Nn x N1 (fft along N1 and transpose) N3 x N4 x N5 x .... x Nn x N2 x N1 (fft along N2 and transpose) N4 x N5 x N6 x .. x Nn x N3 x N2 x N1 (fft along N3 and transpose) ...... Nn x Nn-1 x Nn-2 x ........ N3 x N2 x N1 (fft along Nn and transpose) Decomposition is such that algorithm is fully optimized for memory hierarchy. N1 (largest base radix) is constrained by maximum register usage by work item (largest size of in-register fft) and product N2 x N3 .... x Nn determine the maximum size of work group which is constrained by local memory used by work group (local memory is used to share data among work items i.e. local transposes). Togather these two parameters determine the maximum size fft that can be computed by just using register file and local memory without reverting to global memory for transpose (i.e. these sizes do not require global transpose and thus no inter work group communication). However, for larger sizes, global communication among workgroup is required and multiple kernel launches are needed depending on the size and the base radix used. For details of parameters user can play with, please see the comments in fft_internal.h and kernel_string.cpp, which has the main kernel generator functions ... especially see the comments preceeding function getRadixArray and getGlobalRadixInfo. User can adjust these parameters you achieve best performance on his device. Description of API Calls ========================= clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ); This function creates a plan and returns a handle to it for use with other functions below. context context in which things are happening n n.x, n.y, n.z contain the dimension of signal (length along each dimension) dim much be one of clFFT_1D, clFFT_2D, clFFT_3D for one, two or three dimensional fft dataFormat much be either clFFT_InterleavedComplexFormat or clFFT_SplitComplexFormat for either interleaved or plannar data (real and imaginary) error_code pointer for getting error back in plan creation. In case of error NULL plan is returned ========================== void clFFT_DestroyPlan( clFFT_Plan plan ); Function to release/free resources ========================== cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, cl_mem data_in, cl_mem data_out, cl_int num_events, cl_event *event_list, cl_event *event ); Function for interleaved fft execution. queue command queue for the device on which fft needs to be executed. It should be present in the context for this plan was created plan fft plan that was created using clFFT_CreatePlan batchSize size of the batch for batched transform dir much be either clFFT_Forward or clFFT_Inverse for forward or inverse transform data_in input data data_out output data. For in-place transform, pass same mem object for both data_in and data_out num_events, event_list and event are for future use for letting fft fit in other CL based application pipeline through event dependency. Not implemented in this version yet so these parameters are redundant right now. Just pass NULL. ========================= cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag, cl_int num_events, cl_event *event_list, cl_event *event ); Same as above but for plannar data type. ========================= cl_int clFFT_1DTwistInterleaved( clFFT_Plan plan, cl_mem mem, size_t numRows, size_t numCols, size_t startRow, clFFT_Direction dir ); Function for applying twist (twiddle factor multiplication) for virtualizing computation of very large ffts that cannot fit into global memory at once but can be decomposed into many global memory fitting ffts followed by twiddle multiplication (twist) followed by transpose followed by again many global memory fitting ffts. ========================= cl_int clFFT_1DTwistPlanner( clFFT_Plan plan, cl_mem mem_real, cl_mem mem_imag, size_t numRows, size_t numCols, size_t startRow, clFFT_Direction dir ); Same fucntion as above but for plannar data ========================= void clFFT_DumpPlan( clFFT_Plan plan, FILE *file); Function to dump the plan. Passing stdout to file prints out the plan to standard out. It prints out the kernel string and local, global dimension with which each kernel is executed in this plan. ================================================================================== IMPORTANT NOTE ON PERFORMANCE: Currently there are a few known performance issues (bug) that this sample has discovered in rumtime and code generation that are being actively fixed. Hence, for sizes >= 1024, performance is much below the expected peak for any particular size. However, we have internally verified that once these bugs are fixed, performance should be on par with expected peak. Note that these are bugs in OpenCL runtime/compiler and not in this sample. =========================================================================== BUILD REQUIREMENTS: Mac OS X v10.6 or later If you are running in Xcode, be sure to pass file name "param.txt". You can do that by double clicking OpenCL_FFT under executable and then click on Argument tab and add ./../../param.txt under "Arguments to be passed on launch" section. =========================================================================== RUNTIME REQUIREMENTS: . Mac OS X v10.6 or later with OpenCL 1.0 . For good performance, device should support local memory. FFT performance critically depend on how efficiently local shuffles (matrix transposes) using local memory to reduce external DRAM bandwidth requirement. =========================================================================== PACKAGING LIST: AccelerateError.pdf clFFT.h Error.pdf fft_base_kernels.h fft_execute.cpp fft_internal.h fft_kernelstring.cpp fft_setup.cpp main.cpp Makefile OpenCL_FFT.xcodeproj OpenCLError.pdf param.txt procs.h ReadMe.txt =========================================================================== CHANGES FROM PREVIOUS VERSIONS: Version 1.0 - First version. =========================================================================== Copyright (C) 2008 Apple Inc. All rights reserved. \ No newline at end of file diff --git a/RTCP/GPUProc/OpenCL_FFT/src/clFFT.h b/RTCP/GPUProc/OpenCL_FFT/src/clFFT.h new file mode 100644 index 0000000000000000000000000000000000000000..e893d95393a63af2a825778e3654b84c3ce62c4f --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/clFFT.h @@ -0,0 +1,129 @@ + +// +// File: clFFT.h +// +// Version: <1.0> +// +// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") +// in consideration of your agreement to the following terms, and your use, +// installation, modification or redistribution of this Apple software +// constitutes acceptance of these terms. If you do not agree with these +// terms, please do not use, install, modify or redistribute this Apple +// software. +// +// In consideration of your agreement to abide by the following terms, and +// subject to these terms, Apple grants you a personal, non - exclusive +// license, under Apple's copyrights in this original Apple software ( the +// "Apple Software" ), to use, reproduce, modify and redistribute the Apple +// Software, with or without modifications, in source and / or binary forms; +// provided that if you redistribute the Apple Software in its entirety and +// without modifications, you must retain this notice and the following text +// and disclaimers in all such redistributions of the Apple Software. Neither +// the name, trademarks, service marks or logos of Apple Inc. may be used to +// endorse or promote products derived from the Apple Software without specific +// prior written permission from Apple. Except as expressly stated in this +// notice, no other rights or licenses, express or implied, are granted by +// Apple herein, including but not limited to any patent rights that may be +// infringed by your derivative works or by other works in which the Apple +// Software may be incorporated. +// +// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO +// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED +// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION +// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. +// +// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR +// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION +// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER +// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR +// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef __CLFFT_H +#define __CLFFT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <CL/cl.h> +#include <stdio.h> + +// XForm type +typedef enum +{ + clFFT_Forward = -1, + clFFT_Inverse = 1 + +}clFFT_Direction; + +// XForm dimension +typedef enum +{ + clFFT_1D = 0, + clFFT_2D = 1, + clFFT_3D = 3 + +}clFFT_Dimension; + +// XForm Data type +typedef enum +{ + clFFT_SplitComplexFormat = 0, + clFFT_InterleavedComplexFormat = 1 +}clFFT_DataFormat; + +typedef struct +{ + unsigned int x; + unsigned int y; + unsigned int z; +}clFFT_Dim3; + +typedef struct +{ + float *real; + float *imag; +} clFFT_SplitComplex; + +typedef struct +{ + float real; + float imag; +}clFFT_Complex; + +typedef void* clFFT_Plan; + +clFFT_Plan clFFT_CreatePlan( cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ); + +void clFFT_DestroyPlan( clFFT_Plan plan ); + +cl_int clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, + cl_mem data_in, cl_mem data_out, + cl_int num_events, cl_event *event_list, cl_event *event ); + +cl_int clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan plan, cl_int batchSize, clFFT_Direction dir, + cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag, + cl_int num_events, cl_event *event_list, cl_event *event ); + +cl_int clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array, + size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir); + + +cl_int clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag, + size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir); + +void clFFT_DumpPlan( clFFT_Plan plan, FILE *file); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/RTCP/GPUProc/OpenCL_FFT/src/fft_base_kernels.h b/RTCP/GPUProc/OpenCL_FFT/src/fft_base_kernels.h new file mode 100644 index 0000000000000000000000000000000000000000..101795697f55e125e4fbafa8757aef5de033fad6 --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/fft_base_kernels.h @@ -0,0 +1,277 @@ + +// +// File: fft_base_kernels.h +// +// Version: <1.0> +// +// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") +// in consideration of your agreement to the following terms, and your use, +// installation, modification or redistribution of this Apple software +// constitutes acceptance of these terms. If you do not agree with these +// terms, please do not use, install, modify or redistribute this Apple +// software. +// +// In consideration of your agreement to abide by the following terms, and +// subject to these terms, Apple grants you a personal, non - exclusive +// license, under Apple's copyrights in this original Apple software ( the +// "Apple Software" ), to use, reproduce, modify and redistribute the Apple +// Software, with or without modifications, in source and / or binary forms; +// provided that if you redistribute the Apple Software in its entirety and +// without modifications, you must retain this notice and the following text +// and disclaimers in all such redistributions of the Apple Software. Neither +// the name, trademarks, service marks or logos of Apple Inc. may be used to +// endorse or promote products derived from the Apple Software without specific +// prior written permission from Apple. Except as expressly stated in this +// notice, no other rights or licenses, express or implied, are granted by +// Apple herein, including but not limited to any patent rights that may be +// infringed by your derivative works or by other works in which the Apple +// Software may be incorporated. +// +// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO +// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED +// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION +// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. +// +// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR +// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION +// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER +// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR +// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef __CL_FFT_BASE_KERNELS_ +#define __CL_FFT_BASE_KERNELS_ + +#include <string> + +using namespace std; + +static string baseKernels = string( + "#ifndef M_PI\n" + "#define M_PI 0x1.921fb54442d18p+1\n" + "#endif\n" + "#define complexMul(a,b) ((float2)(mad(-(a).y, (b).y, (a).x * (b).x), mad((a).y, (b).x, (a).x * (b).y)))\n" + "#define conj(a) ((float2)((a).x, -(a).y))\n" + "#define conjTransp(a) ((float2)(-(a).y, (a).x))\n" + "\n" + "#define fftKernel2(a,dir) \\\n" + "{ \\\n" + " float2 c = (a)[0]; \\\n" + " (a)[0] = c + (a)[1]; \\\n" + " (a)[1] = c - (a)[1]; \\\n" + "}\n" + "\n" + "#define fftKernel2S(d1,d2,dir) \\\n" + "{ \\\n" + " float2 c = (d1); \\\n" + " (d1) = c + (d2); \\\n" + " (d2) = c - (d2); \\\n" + "}\n" + "\n" + "#define fftKernel4(a,dir) \\\n" + "{ \\\n" + " fftKernel2S((a)[0], (a)[2], dir); \\\n" + " fftKernel2S((a)[1], (a)[3], dir); \\\n" + " fftKernel2S((a)[0], (a)[1], dir); \\\n" + " (a)[3] = (float2)(dir)*(conjTransp((a)[3])); \\\n" + " fftKernel2S((a)[2], (a)[3], dir); \\\n" + " float2 c = (a)[1]; \\\n" + " (a)[1] = (a)[2]; \\\n" + " (a)[2] = c; \\\n" + "}\n" + "\n" + "#define fftKernel4s(a0,a1,a2,a3,dir) \\\n" + "{ \\\n" + " fftKernel2S((a0), (a2), dir); \\\n" + " fftKernel2S((a1), (a3), dir); \\\n" + " fftKernel2S((a0), (a1), dir); \\\n" + " (a3) = (float2)(dir)*(conjTransp((a3))); \\\n" + " fftKernel2S((a2), (a3), dir); \\\n" + " float2 c = (a1); \\\n" + " (a1) = (a2); \\\n" + " (a2) = c; \\\n" + "}\n" + "\n" + "#define bitreverse8(a) \\\n" + "{ \\\n" + " float2 c; \\\n" + " c = (a)[1]; \\\n" + " (a)[1] = (a)[4]; \\\n" + " (a)[4] = c; \\\n" + " c = (a)[3]; \\\n" + " (a)[3] = (a)[6]; \\\n" + " (a)[6] = c; \\\n" + "}\n" + "\n" + "#define fftKernel8(a,dir) \\\n" + "{ \\\n" + " const float2 w1 = (float2)(0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f); \\\n" + " const float2 w3 = (float2)(-0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f); \\\n" + " float2 c; \\\n" + " fftKernel2S((a)[0], (a)[4], dir); \\\n" + " fftKernel2S((a)[1], (a)[5], dir); \\\n" + " fftKernel2S((a)[2], (a)[6], dir); \\\n" + " fftKernel2S((a)[3], (a)[7], dir); \\\n" + " (a)[5] = complexMul(w1, (a)[5]); \\\n" + " (a)[6] = (float2)(dir)*(conjTransp((a)[6])); \\\n" + " (a)[7] = complexMul(w3, (a)[7]); \\\n" + " fftKernel2S((a)[0], (a)[2], dir); \\\n" + " fftKernel2S((a)[1], (a)[3], dir); \\\n" + " fftKernel2S((a)[4], (a)[6], dir); \\\n" + " fftKernel2S((a)[5], (a)[7], dir); \\\n" + " (a)[3] = (float2)(dir)*(conjTransp((a)[3])); \\\n" + " (a)[7] = (float2)(dir)*(conjTransp((a)[7])); \\\n" + " fftKernel2S((a)[0], (a)[1], dir); \\\n" + " fftKernel2S((a)[2], (a)[3], dir); \\\n" + " fftKernel2S((a)[4], (a)[5], dir); \\\n" + " fftKernel2S((a)[6], (a)[7], dir); \\\n" + " bitreverse8((a)); \\\n" + "}\n" + "\n" + "#define bitreverse4x4(a) \\\n" + "{ \\\n" + " float2 c; \\\n" + " c = (a)[1]; (a)[1] = (a)[4]; (a)[4] = c; \\\n" + " c = (a)[2]; (a)[2] = (a)[8]; (a)[8] = c; \\\n" + " c = (a)[3]; (a)[3] = (a)[12]; (a)[12] = c; \\\n" + " c = (a)[6]; (a)[6] = (a)[9]; (a)[9] = c; \\\n" + " c = (a)[7]; (a)[7] = (a)[13]; (a)[13] = c; \\\n" + " c = (a)[11]; (a)[11] = (a)[14]; (a)[14] = c; \\\n" + "}\n" + "\n" + "#define fftKernel16(a,dir) \\\n" + "{ \\\n" + " const float w0 = 0x1.d906bcp-1f; \\\n" + " const float w1 = 0x1.87de2ap-2f; \\\n" + " const float w2 = 0x1.6a09e6p-1f; \\\n" + " fftKernel4s((a)[0], (a)[4], (a)[8], (a)[12], dir); \\\n" + " fftKernel4s((a)[1], (a)[5], (a)[9], (a)[13], dir); \\\n" + " fftKernel4s((a)[2], (a)[6], (a)[10], (a)[14], dir); \\\n" + " fftKernel4s((a)[3], (a)[7], (a)[11], (a)[15], dir); \\\n" + " (a)[5] = complexMul((a)[5], (float2)(w0, dir*w1)); \\\n" + " (a)[6] = complexMul((a)[6], (float2)(w2, dir*w2)); \\\n" + " (a)[7] = complexMul((a)[7], (float2)(w1, dir*w0)); \\\n" + " (a)[9] = complexMul((a)[9], (float2)(w2, dir*w2)); \\\n" + " (a)[10] = (float2)(dir)*(conjTransp((a)[10])); \\\n" + " (a)[11] = complexMul((a)[11], (float2)(-w2, dir*w2)); \\\n" + " (a)[13] = complexMul((a)[13], (float2)(w1, dir*w0)); \\\n" + " (a)[14] = complexMul((a)[14], (float2)(-w2, dir*w2)); \\\n" + " (a)[15] = complexMul((a)[15], (float2)(-w0, dir*-w1)); \\\n" + " fftKernel4((a), dir); \\\n" + " fftKernel4((a) + 4, dir); \\\n" + " fftKernel4((a) + 8, dir); \\\n" + " fftKernel4((a) + 12, dir); \\\n" + " bitreverse4x4((a)); \\\n" + "}\n" + "\n" + "#define bitreverse32(a) \\\n" + "{ \\\n" + " float2 c1, c2; \\\n" + " c1 = (a)[2]; (a)[2] = (a)[1]; c2 = (a)[4]; (a)[4] = c1; c1 = (a)[8]; (a)[8] = c2; c2 = (a)[16]; (a)[16] = c1; (a)[1] = c2; \\\n" + " c1 = (a)[6]; (a)[6] = (a)[3]; c2 = (a)[12]; (a)[12] = c1; c1 = (a)[24]; (a)[24] = c2; c2 = (a)[17]; (a)[17] = c1; (a)[3] = c2; \\\n" + " c1 = (a)[10]; (a)[10] = (a)[5]; c2 = (a)[20]; (a)[20] = c1; c1 = (a)[9]; (a)[9] = c2; c2 = (a)[18]; (a)[18] = c1; (a)[5] = c2; \\\n" + " c1 = (a)[14]; (a)[14] = (a)[7]; c2 = (a)[28]; (a)[28] = c1; c1 = (a)[25]; (a)[25] = c2; c2 = (a)[19]; (a)[19] = c1; (a)[7] = c2; \\\n" + " c1 = (a)[22]; (a)[22] = (a)[11]; c2 = (a)[13]; (a)[13] = c1; c1 = (a)[26]; (a)[26] = c2; c2 = (a)[21]; (a)[21] = c1; (a)[11] = c2; \\\n" + " c1 = (a)[30]; (a)[30] = (a)[15]; c2 = (a)[29]; (a)[29] = c1; c1 = (a)[27]; (a)[27] = c2; c2 = (a)[23]; (a)[23] = c1; (a)[15] = c2; \\\n" + "}\n" + "\n" + "#define fftKernel32(a,dir) \\\n" + "{ \\\n" + " fftKernel2S((a)[0], (a)[16], dir); \\\n" + " fftKernel2S((a)[1], (a)[17], dir); \\\n" + " fftKernel2S((a)[2], (a)[18], dir); \\\n" + " fftKernel2S((a)[3], (a)[19], dir); \\\n" + " fftKernel2S((a)[4], (a)[20], dir); \\\n" + " fftKernel2S((a)[5], (a)[21], dir); \\\n" + " fftKernel2S((a)[6], (a)[22], dir); \\\n" + " fftKernel2S((a)[7], (a)[23], dir); \\\n" + " fftKernel2S((a)[8], (a)[24], dir); \\\n" + " fftKernel2S((a)[9], (a)[25], dir); \\\n" + " fftKernel2S((a)[10], (a)[26], dir); \\\n" + " fftKernel2S((a)[11], (a)[27], dir); \\\n" + " fftKernel2S((a)[12], (a)[28], dir); \\\n" + " fftKernel2S((a)[13], (a)[29], dir); \\\n" + " fftKernel2S((a)[14], (a)[30], dir); \\\n" + " fftKernel2S((a)[15], (a)[31], dir); \\\n" + " (a)[17] = complexMul((a)[17], (float2)(0x1.f6297cp-1f, dir*0x1.8f8b84p-3f)); \\\n" + " (a)[18] = complexMul((a)[18], (float2)(0x1.d906bcp-1f, dir*0x1.87de2ap-2f)); \\\n" + " (a)[19] = complexMul((a)[19], (float2)(0x1.a9b662p-1f, dir*0x1.1c73b4p-1f)); \\\n" + " (a)[20] = complexMul((a)[20], (float2)(0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f)); \\\n" + " (a)[21] = complexMul((a)[21], (float2)(0x1.1c73b4p-1f, dir*0x1.a9b662p-1f)); \\\n" + " (a)[22] = complexMul((a)[22], (float2)(0x1.87de2ap-2f, dir*0x1.d906bcp-1f)); \\\n" + " (a)[23] = complexMul((a)[23], (float2)(0x1.8f8b84p-3f, dir*0x1.f6297cp-1f)); \\\n" + " (a)[24] = complexMul((a)[24], (float2)(0x0p+0f, dir*0x1p+0f)); \\\n" + " (a)[25] = complexMul((a)[25], (float2)(-0x1.8f8b84p-3f, dir*0x1.f6297cp-1f)); \\\n" + " (a)[26] = complexMul((a)[26], (float2)(-0x1.87de2ap-2f, dir*0x1.d906bcp-1f)); \\\n" + " (a)[27] = complexMul((a)[27], (float2)(-0x1.1c73b4p-1f, dir*0x1.a9b662p-1f)); \\\n" + " (a)[28] = complexMul((a)[28], (float2)(-0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f)); \\\n" + " (a)[29] = complexMul((a)[29], (float2)(-0x1.a9b662p-1f, dir*0x1.1c73b4p-1f)); \\\n" + " (a)[30] = complexMul((a)[30], (float2)(-0x1.d906bcp-1f, dir*0x1.87de2ap-2f)); \\\n" + " (a)[31] = complexMul((a)[31], (float2)(-0x1.f6297cp-1f, dir*0x1.8f8b84p-3f)); \\\n" + " fftKernel16((a), dir); \\\n" + " fftKernel16((a) + 16, dir); \\\n" + " bitreverse32((a)); \\\n" + "}\n\n" + ); + +static string twistKernelInterleaved = string( + "__kernel void \\\n" + "clFFT_1DTwistInterleaved(__global float2 *in, unsigned int startRow, unsigned int numCols, unsigned int N, unsigned int numRowsToProcess, int dir) \\\n" + "{ \\\n" + " float2 a, w; \\\n" + " float ang; \\\n" + " unsigned int j; \\\n" + " unsigned int i = get_global_id(0); \\\n" + " unsigned int startIndex = i; \\\n" + " \\\n" + " if(i < numCols) \\\n" + " { \\\n" + " for(j = 0; j < numRowsToProcess; j++) \\\n" + " { \\\n" + " a = in[startIndex]; \\\n" + " ang = 2.0f * M_PI * dir * i * (startRow + j) / N; \\\n" + " w = (float2)(native_cos(ang), native_sin(ang)); \\\n" + " a = complexMul(a, w); \\\n" + " in[startIndex] = a; \\\n" + " startIndex += numCols; \\\n" + " } \\\n" + " } \\\n" + "} \\\n" + ); + +static string twistKernelPlannar = string( + "__kernel void \\\n" + "clFFT_1DTwistSplit(__global float *in_real, __global float *in_imag , unsigned int startRow, unsigned int numCols, unsigned int N, unsigned int numRowsToProcess, int dir) \\\n" + "{ \\\n" + " float2 a, w; \\\n" + " float ang; \\\n" + " unsigned int j; \\\n" + " unsigned int i = get_global_id(0); \\\n" + " unsigned int startIndex = i; \\\n" + " \\\n" + " if(i < numCols) \\\n" + " { \\\n" + " for(j = 0; j < numRowsToProcess; j++) \\\n" + " { \\\n" + " a = (float2)(in_real[startIndex], in_imag[startIndex]); \\\n" + " ang = 2.0f * M_PI * dir * i * (startRow + j) / N; \\\n" + " w = (float2)(native_cos(ang), native_sin(ang)); \\\n" + " a = complexMul(a, w); \\\n" + " in_real[startIndex] = a.x; \\\n" + " in_imag[startIndex] = a.y; \\\n" + " startIndex += numCols; \\\n" + " } \\\n" + " } \\\n" + "} \\\n" + ); + + + +#endif diff --git a/RTCP/GPUProc/OpenCL_FFT/src/fft_execute.cpp b/RTCP/GPUProc/OpenCL_FFT/src/fft_execute.cpp new file mode 100644 index 0000000000000000000000000000000000000000..64dacdfbdbcec3e1fa879b9236f0015775570305 --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/fft_execute.cpp @@ -0,0 +1,405 @@ + +// +// File: fft_execute.cpp +// +// Version: <1.0> +// +// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") +// in consideration of your agreement to the following terms, and your use, +// installation, modification or redistribution of this Apple software +// constitutes acceptance of these terms. If you do not agree with these +// terms, please do not use, install, modify or redistribute this Apple +// software.¬ +// +// In consideration of your agreement to abide by the following terms, and +// subject to these terms, Apple grants you a personal, non - exclusive +// license, under Apple's copyrights in this original Apple software ( the +// "Apple Software" ), to use, reproduce, modify and redistribute the Apple +// Software, with or without modifications, in source and / or binary forms; +// provided that if you redistribute the Apple Software in its entirety and +// without modifications, you must retain this notice and the following text +// and disclaimers in all such redistributions of the Apple Software. Neither +// the name, trademarks, service marks or logos of Apple Inc. may be used to +// endorse or promote products derived from the Apple Software without specific +// prior written permission from Apple. Except as expressly stated in this +// notice, no other rights or licenses, express or implied, are granted by +// Apple herein, including but not limited to any patent rights that may be +// infringed by your derivative works or by other works in which the Apple +// Software may be incorporated. +// +// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO +// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED +// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION +// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. +// +// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR +// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION +// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER +// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR +// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +#include "fft_internal.h" +#include "clFFT.h" +#include <stdlib.h> +#include <stdio.h> +#include <math.h> + +#define max(a,b) (((a)>(b)) ? (a) : (b)) +#define min(a,b) (((a)<(b)) ? (a) : (b)) + +static cl_int +allocateTemporaryBufferInterleaved(cl_fft_plan *plan, cl_uint batchSize) +{ + cl_int err = CL_SUCCESS; + if(plan->temp_buffer_needed && plan->last_batch_size != batchSize) + { + plan->last_batch_size = batchSize; + size_t tmpLength = plan->n.x * plan->n.y * plan->n.z * batchSize * 2 * sizeof(cl_float); + + if(plan->tempmemobj) + clReleaseMemObject(plan->tempmemobj); + + plan->tempmemobj = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &err); + } + return err; +} + +static cl_int +allocateTemporaryBufferPlannar(cl_fft_plan *plan, cl_uint batchSize) +{ + cl_int err = CL_SUCCESS; + cl_int terr; + if(plan->temp_buffer_needed && plan->last_batch_size != batchSize) + { + plan->last_batch_size = batchSize; + size_t tmpLength = plan->n.x * plan->n.y * plan->n.z * batchSize * sizeof(cl_float); + + if(plan->tempmemobj_real) + clReleaseMemObject(plan->tempmemobj_real); + + if(plan->tempmemobj_imag) + clReleaseMemObject(plan->tempmemobj_imag); + + plan->tempmemobj_real = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &err); + plan->tempmemobj_imag = clCreateBuffer(plan->context, CL_MEM_READ_WRITE, tmpLength, NULL, &terr); + err |= terr; + } + return err; +} + +void +getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_int *batchSize, size_t *gWorkItems, size_t *lWorkItems) +{ + *lWorkItems = kernelInfo->num_workitems_per_workgroup; + int numWorkGroups = kernelInfo->num_workgroups; + int numXFormsPerWG = kernelInfo->num_xforms_per_workgroup; + + switch(kernelInfo->dir) + { + case cl_fft_kernel_x: + *batchSize *= (plan->n.y * plan->n.z); + numWorkGroups = (*batchSize % numXFormsPerWG) ? (*batchSize/numXFormsPerWG + 1) : (*batchSize/numXFormsPerWG); + numWorkGroups *= kernelInfo->num_workgroups; + break; + case cl_fft_kernel_y: + *batchSize *= plan->n.z; + numWorkGroups *= *batchSize; + break; + case cl_fft_kernel_z: + numWorkGroups *= *batchSize; + break; + } + + *gWorkItems = numWorkGroups * *lWorkItems; +} + +cl_int +clFFT_ExecuteInterleaved( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir, + cl_mem data_in, cl_mem data_out, + cl_int num_events, cl_event *event_list, cl_event *event ) +{ + int s; + cl_fft_plan *plan = (cl_fft_plan *) Plan; + if(plan->format != clFFT_InterleavedComplexFormat) + return CL_INVALID_VALUE; + + cl_int err; + size_t gWorkItems, lWorkItems; + int inPlaceDone; + + cl_int isInPlace = data_in == data_out ? 1 : 0; + + if((err = allocateTemporaryBufferInterleaved(plan, batchSize)) != CL_SUCCESS) + return err; + + cl_mem memObj[3]; + memObj[0] = data_in; + memObj[1] = data_out; + memObj[2] = plan->tempmemobj; + cl_fft_kernel_info *kernelInfo = plan->kernel_info; + int numKernels = plan->num_kernels; + + int numKernelsOdd = numKernels & 1; + int currRead = 0; + int currWrite = 1; + + // at least one external dram shuffle (transpose) required + if(plan->temp_buffer_needed) + { + // in-place transform + if(isInPlace) + { + inPlaceDone = 0; + currRead = 1; + currWrite = 2; + } + else + { + currWrite = (numKernels & 1) ? 1 : 2; + } + + while(kernelInfo) + { + if( isInPlace && numKernelsOdd && !inPlaceDone && kernelInfo->in_place_possible) + { + currWrite = currRead; + inPlaceDone = 1; + } + + s = batchSize; + getKernelWorkDimensions(plan, kernelInfo, &s, &gWorkItems, &lWorkItems); + err |= clSetKernelArg(kernelInfo->kernel, 0, sizeof(cl_mem), &memObj[currRead]); + err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj[currWrite]); + err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir); + err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s); + + err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + if(err) + return err; + + currRead = (currWrite == 1) ? 1 : 2; + currWrite = (currWrite == 1) ? 2 : 1; + + kernelInfo = kernelInfo->next; + } + } + // no dram shuffle (transpose required) transform + // all kernels can execute in-place. + else { + + while(kernelInfo) + { + s = batchSize; + getKernelWorkDimensions(plan, kernelInfo, &s, &gWorkItems, &lWorkItems); + err |= clSetKernelArg(kernelInfo->kernel, 0, sizeof(cl_mem), &memObj[currRead]); + err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj[currWrite]); + err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_int), &dir); + err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_int), &s); + + err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + if(err) + return err; + + currRead = 1; + currWrite = 1; + + kernelInfo = kernelInfo->next; + } + } + + return err; +} + +cl_int +clFFT_ExecutePlannar( cl_command_queue queue, clFFT_Plan Plan, cl_int batchSize, clFFT_Direction dir, + cl_mem data_in_real, cl_mem data_in_imag, cl_mem data_out_real, cl_mem data_out_imag, + cl_int num_events, cl_event *event_list, cl_event *event) +{ + int s; + cl_fft_plan *plan = (cl_fft_plan *) Plan; + + if(plan->format != clFFT_SplitComplexFormat) + return CL_INVALID_VALUE; + + cl_int err; + size_t gWorkItems, lWorkItems; + int inPlaceDone; + + cl_int isInPlace = ((data_in_real == data_out_real) && (data_in_imag == data_out_imag)) ? 1 : 0; + + if((err = allocateTemporaryBufferPlannar(plan, batchSize)) != CL_SUCCESS) + return err; + + cl_mem memObj_real[3]; + cl_mem memObj_imag[3]; + memObj_real[0] = data_in_real; + memObj_real[1] = data_out_real; + memObj_real[2] = plan->tempmemobj_real; + memObj_imag[0] = data_in_imag; + memObj_imag[1] = data_out_imag; + memObj_imag[2] = plan->tempmemobj_imag; + + cl_fft_kernel_info *kernelInfo = plan->kernel_info; + int numKernels = plan->num_kernels; + + int numKernelsOdd = numKernels & 1; + int currRead = 0; + int currWrite = 1; + + // at least one external dram shuffle (transpose) required + if(plan->temp_buffer_needed) + { + // in-place transform + if(isInPlace) + { + inPlaceDone = 0; + currRead = 1; + currWrite = 2; + } + else + { + currWrite = (numKernels & 1) ? 1 : 2; + } + + while(kernelInfo) + { + if( isInPlace && numKernelsOdd && !inPlaceDone && kernelInfo->in_place_possible) + { + currWrite = currRead; + inPlaceDone = 1; + } + + s = batchSize; + getKernelWorkDimensions(plan, kernelInfo, &s, &gWorkItems, &lWorkItems); + err |= clSetKernelArg(kernelInfo->kernel, 0, sizeof(cl_mem), &memObj_real[currRead]); + err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj_imag[currRead]); + err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_mem), &memObj_real[currWrite]); + err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_mem), &memObj_imag[currWrite]); + err |= clSetKernelArg(kernelInfo->kernel, 4, sizeof(cl_int), &dir); + err |= clSetKernelArg(kernelInfo->kernel, 5, sizeof(cl_int), &s); + + err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + if(err) + return err; + + currRead = (currWrite == 1) ? 1 : 2; + currWrite = (currWrite == 1) ? 2 : 1; + + kernelInfo = kernelInfo->next; + } + } + // no dram shuffle (transpose required) transform + else { + + while(kernelInfo) + { + s = batchSize; + getKernelWorkDimensions(plan, kernelInfo, &s, &gWorkItems, &lWorkItems); + err |= clSetKernelArg(kernelInfo->kernel, 0, sizeof(cl_mem), &memObj_real[currRead]); + err |= clSetKernelArg(kernelInfo->kernel, 1, sizeof(cl_mem), &memObj_imag[currRead]); + err |= clSetKernelArg(kernelInfo->kernel, 2, sizeof(cl_mem), &memObj_real[currWrite]); + err |= clSetKernelArg(kernelInfo->kernel, 3, sizeof(cl_mem), &memObj_imag[currWrite]); + err |= clSetKernelArg(kernelInfo->kernel, 4, sizeof(cl_int), &dir); + err |= clSetKernelArg(kernelInfo->kernel, 5, sizeof(cl_int), &s); + + err |= clEnqueueNDRangeKernel(queue, kernelInfo->kernel, 1, NULL, &gWorkItems, &lWorkItems, num_events, event_list, event); + if(err) + return err; + + currRead = 1; + currWrite = 1; + + kernelInfo = kernelInfo->next; + } + } + + return err; +} + +cl_int +clFFT_1DTwistInterleaved(clFFT_Plan Plan, cl_command_queue queue, cl_mem array, + size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir) +{ + cl_fft_plan *plan = (cl_fft_plan *) Plan; + + unsigned int N = numRows*numCols; + unsigned int nCols = numCols; + unsigned int sRow = startRow; + unsigned int rToProcess = rowsToProcess; + int d = dir; + int err = 0; + + cl_device_id device_id; + err = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device_id, NULL); + if(err) + return err; + + size_t gSize; + err = clGetKernelWorkGroupInfo(plan->twist_kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &gSize, NULL); + if(err) + return err; + + gSize = min(128, gSize); + size_t numGlobalThreads[1] = { max(numCols / gSize, 1)*gSize }; + size_t numLocalThreads[1] = { gSize }; + + err |= clSetKernelArg(plan->twist_kernel, 0, sizeof(cl_mem), &array); + err |= clSetKernelArg(plan->twist_kernel, 1, sizeof(unsigned int), &sRow); + err |= clSetKernelArg(plan->twist_kernel, 2, sizeof(unsigned int), &nCols); + err |= clSetKernelArg(plan->twist_kernel, 3, sizeof(unsigned int), &N); + err |= clSetKernelArg(plan->twist_kernel, 4, sizeof(unsigned int), &rToProcess); + err |= clSetKernelArg(plan->twist_kernel, 5, sizeof(int), &d); + + err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL); + + return err; +} + +cl_int +clFFT_1DTwistPlannar(clFFT_Plan Plan, cl_command_queue queue, cl_mem array_real, cl_mem array_imag, + size_t numRows, size_t numCols, size_t startRow, size_t rowsToProcess, clFFT_Direction dir) +{ + cl_fft_plan *plan = (cl_fft_plan *) Plan; + + unsigned int N = numRows*numCols; + unsigned int nCols = numCols; + unsigned int sRow = startRow; + unsigned int rToProcess = rowsToProcess; + int d = dir; + int err = 0; + + cl_device_id device_id; + err = clGetCommandQueueInfo(queue, CL_QUEUE_DEVICE, sizeof(cl_device_id), &device_id, NULL); + if(err) + return err; + + size_t gSize; + err = clGetKernelWorkGroupInfo(plan->twist_kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &gSize, NULL); + if(err) + return err; + + gSize = min(128, gSize); + size_t numGlobalThreads[1] = { max(numCols / gSize, 1)*gSize }; + size_t numLocalThreads[1] = { gSize }; + + err |= clSetKernelArg(plan->twist_kernel, 0, sizeof(cl_mem), &array_real); + err |= clSetKernelArg(plan->twist_kernel, 1, sizeof(cl_mem), &array_imag); + err |= clSetKernelArg(plan->twist_kernel, 2, sizeof(unsigned int), &sRow); + err |= clSetKernelArg(plan->twist_kernel, 3, sizeof(unsigned int), &nCols); + err |= clSetKernelArg(plan->twist_kernel, 4, sizeof(unsigned int), &N); + err |= clSetKernelArg(plan->twist_kernel, 5, sizeof(unsigned int), &rToProcess); + err |= clSetKernelArg(plan->twist_kernel, 6, sizeof(int), &d); + + err |= clEnqueueNDRangeKernel(queue, plan->twist_kernel, 1, NULL, numGlobalThreads, numLocalThreads, 0, NULL, NULL); + + return err; +} + diff --git a/RTCP/GPUProc/OpenCL_FFT/src/fft_internal.h b/RTCP/GPUProc/OpenCL_FFT/src/fft_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..a45b69c98af037b2228aa29b4a046b1c4f1cf86f --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/fft_internal.h @@ -0,0 +1,163 @@ + +// +// File: fft_internal.h +// +// Version: <1.0> +// +// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") +// in consideration of your agreement to the following terms, and your use, +// installation, modification or redistribution of this Apple software +// constitutes acceptance of these terms. If you do not agree with these +// terms, please do not use, install, modify or redistribute this Apple +// software. +// +// In consideration of your agreement to abide by the following terms, and +// subject to these terms, Apple grants you a personal, non - exclusive +// license, under Apple's copyrights in this original Apple software ( the +// "Apple Software" ), to use, reproduce, modify and redistribute the Apple +// Software, with or without modifications, in source and / or binary forms; +// provided that if you redistribute the Apple Software in its entirety and +// without modifications, you must retain this notice and the following text +// and disclaimers in all such redistributions of the Apple Software. Neither +// the name, trademarks, service marks or logos of Apple Inc. may be used to +// endorse or promote products derived from the Apple Software without specific +// prior written permission from Apple. Except as expressly stated in this +// notice, no other rights or licenses, express or implied, are granted by +// Apple herein, including but not limited to any patent rights that may be +// infringed by your derivative works or by other works in which the Apple +// Software may be incorporated. +// +// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO +// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED +// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION +// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. +// +// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR +// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION +// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER +// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR +// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +#ifndef __CLFFT_INTERNAL_H +#define __CLFFT_INTERNAL_H + +#include "clFFT.h" +#include <iostream> +#include <string> +#include <sstream> + +using namespace std; + +typedef enum kernel_dir_t +{ + cl_fft_kernel_x, + cl_fft_kernel_y, + cl_fft_kernel_z +}cl_fft_kernel_dir; + +typedef struct kernel_info_t +{ + cl_kernel kernel; + char *kernel_name; + size_t lmem_size; + size_t num_workgroups; + size_t num_xforms_per_workgroup; + size_t num_workitems_per_workgroup; + cl_fft_kernel_dir dir; + int in_place_possible; + kernel_info_t *next; +}cl_fft_kernel_info; + +typedef struct +{ + // context in which fft resources are created and kernels are executed + cl_context context; + + // size of signal + clFFT_Dim3 n; + + // dimension of transform ... must be either 1D, 2D or 3D + clFFT_Dimension dim; + + // data format ... must be either interleaved or plannar + clFFT_DataFormat format; + + // string containing kernel source. Generated at runtime based on + // n, dim, format and other parameters + string *kernel_string; + + // CL program containing source and kernel this particular + // n, dim, data format + cl_program program; + + // linked list of kernels which needs to be executed for this fft + cl_fft_kernel_info *kernel_info; + + // number of kernels + int num_kernels; + + // twist kernel for virtualizing fft of very large sizes that do not + // fit in GPU global memory + cl_kernel twist_kernel; + + // flag indicating if temporary intermediate buffer is needed or not. + // this depends on fft kernels being executed and if transform is + // in-place or out-of-place. e.g. Local memory fft (say 1D 1024 ... + // one that does not require global transpose do not need temporary buffer) + // 2D 1024x1024 out-of-place fft however do require intermediate buffer. + // If temp buffer is needed, its allocation is lazy i.e. its not allocated + // until its needed + cl_int temp_buffer_needed; + + // Batch size is runtime parameter and size of temporary buffer (if needed) + // depends on batch size. Allocation of temporary buffer is lazy i.e. its + // only created when needed. Once its created at first call of clFFT_Executexxx + // it is not allocated next time if next time clFFT_Executexxx is called with + // batch size different than the first call. last_batch_size caches the last + // batch size with which this plan is used so that we dont keep allocating/deallocating + // temp buffer if same batch size is used again and again. + size_t last_batch_size; + + // temporary buffer for interleaved plan + cl_mem tempmemobj; + + // temporary buffer for planner plan. Only one of tempmemobj or + // (tempmemobj_real, tempmemobj_imag) pair is valid (allocated) depending + // data format of plan (plannar or interleaved) + cl_mem tempmemobj_real, tempmemobj_imag; + + // Maximum size of signal for which local memory transposed based + // fft is sufficient i.e. no global mem transpose (communication) + // is needed + size_t max_localmem_fft_size; + + // Maximum work items per work group allowed. This, along with max_radix below controls + // maximum local memory being used by fft kernels of this plan. Set to 256 by default + size_t max_work_item_per_workgroup; + + // Maximum base radix for local memory fft ... this controls the maximum register + // space used by work items. Currently defaults to 16 + size_t max_radix; + + // Device depended parameter that tells how many work-items need to be read consecutive + // values to make sure global memory access by work-items of a work-group result in + // coalesced memory access to utilize full bandwidth e.g. on NVidia tesla, this is 16 + size_t min_mem_coalesce_width; + + // Number of local memory banks. This is used to geneate kernel with local memory + // transposes with appropriate padding to avoid bank conflicts to local memory + // e.g. on NVidia it is 16. + size_t num_local_mem_banks; +}cl_fft_plan; + +void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir); + +#endif diff --git a/RTCP/GPUProc/OpenCL_FFT/src/fft_kernelstring.cpp b/RTCP/GPUProc/OpenCL_FFT/src/fft_kernelstring.cpp new file mode 100644 index 0000000000000000000000000000000000000000..71a7633a45a00a1b21c02363c0adace90cdc8c71 --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/fft_kernelstring.cpp @@ -0,0 +1,1256 @@ + +// +// File: fft_kernelstring.cpp +// +// Version: <1.0> +// +// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") +// in consideration of your agreement to the following terms, and your use, +// installation, modification or redistribution of this Apple software +// constitutes acceptance of these terms. If you do not agree with these +// terms, please do not use, install, modify or redistribute this Apple +// software. +// +// In consideration of your agreement to abide by the following terms, and +// subject to these terms, Apple grants you a personal, non - exclusive +// license, under Apple's copyrights in this original Apple software ( the +// "Apple Software" ), to use, reproduce, modify and redistribute the Apple +// Software, with or without modifications, in source and / or binary forms; +// provided that if you redistribute the Apple Software in its entirety and +// without modifications, you must retain this notice and the following text +// and disclaimers in all such redistributions of the Apple Software. Neither +// the name, trademarks, service marks or logos of Apple Inc. may be used to +// endorse or promote products derived from the Apple Software without specific +// prior written permission from Apple. Except as expressly stated in this +// notice, no other rights or licenses, express or implied, are granted by +// Apple herein, including but not limited to any patent rights that may be +// infringed by your derivative works or by other works in which the Apple +// Software may be incorporated. +// +// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO +// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED +// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION +// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. +// +// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR +// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION +// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER +// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR +// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +#include <cstdio> +#include <cstdlib> +#include <cmath> +#include <iostream> +#include <sstream> +#include <cstring> +#include <cassert> +#include "fft_internal.h" +#include "clFFT.h" + +using namespace std; + +#define max(A,B) ((A) > (B) ? (A) : (B)) +#define min(A,B) ((A) < (B) ? (A) : (B)) + +static string +num2str(int num) +{ + char temp[200]; + sprintf(temp, "%d", num); + return string(temp); +} + +// For any n, this function decomposes n into factors for loacal memory tranpose +// based fft. Factors (radices) are sorted such that the first one (radixArray[0]) +// is the largest. This base radix determines the number of registers used by each +// work item and product of remaining radices determine the size of work group needed. +// To make things concrete with and example, suppose n = 1024. It is decomposed into +// 1024 = 16 x 16 x 4. Hence kernel uses float2 a[16], for local in-register fft and +// needs 16 x 4 = 64 work items per work group. So kernel first performance 64 length +// 16 ffts (64 work items working in parallel) following by transpose using local +// memory followed by again 64 length 16 ffts followed by transpose using local memory +// followed by 256 length 4 ffts. For the last step since with size of work group is +// 64 and each work item can array for 16 values, 64 work items can compute 256 length +// 4 ffts by each work item computing 4 length 4 ffts. +// Similarly for n = 2048 = 8 x 8 x 8 x 4, each work group has 8 x 8 x 4 = 256 work +// iterms which each computes 256 (in-parallel) length 8 ffts in-register, followed +// by transpose using local memory, followed by 256 length 8 in-register ffts, followed +// by transpose using local memory, followed by 256 length 8 in-register ffts, followed +// by transpose using local memory, followed by 512 length 4 in-register ffts. Again, +// for the last step, each work item computes two length 4 in-register ffts and thus +// 256 work items are needed to compute all 512 ffts. +// For n = 32 = 8 x 4, 4 work items first compute 4 in-register +// lenth 8 ffts, followed by transpose using local memory followed by 8 in-register +// length 4 ffts, where each work item computes two length 4 ffts thus 4 work items +// can compute 8 length 4 ffts. However if work group size of say 64 is choosen, +// each work group can compute 64/ 4 = 16 size 32 ffts (batched transform). +// Users can play with these parameters to figure what gives best performance on +// their particular device i.e. some device have less register space thus using +// smaller base radix can avoid spilling ... some has small local memory thus +// using smaller work group size may be required etc + +static void +getRadixArray(unsigned int n, unsigned int *radixArray, unsigned int *numRadices, unsigned int maxRadix) +{ + if(maxRadix > 1) + { + maxRadix = min(n, maxRadix); + unsigned int cnt = 0; + while(n > maxRadix) + { + radixArray[cnt++] = maxRadix; + n /= maxRadix; + } + radixArray[cnt++] = n; + *numRadices = cnt; + return; + } + + switch(n) + { + case 2: + *numRadices = 1; + radixArray[0] = 2; + break; + + case 4: + *numRadices = 1; + radixArray[0] = 4; + break; + + case 8: + *numRadices = 1; + radixArray[0] = 8; + break; + + case 16: + *numRadices = 2; + radixArray[0] = 8; radixArray[1] = 2; + break; + + case 32: + *numRadices = 2; + radixArray[0] = 8; radixArray[1] = 4; + break; + + case 64: + *numRadices = 2; + radixArray[0] = 8; radixArray[1] = 8; + break; + + case 128: + *numRadices = 3; + radixArray[0] = 8; radixArray[1] = 4; radixArray[2] = 4; + break; + + case 256: + *numRadices = 4; + radixArray[0] = 4; radixArray[1] = 4; radixArray[2] = 4; radixArray[3] = 4; + break; + + case 512: + *numRadices = 3; + radixArray[0] = 8; radixArray[1] = 8; radixArray[2] = 8; + break; + + case 1024: + *numRadices = 3; + radixArray[0] = 16; radixArray[1] = 16; radixArray[2] = 4; + break; + case 2048: + *numRadices = 4; + radixArray[0] = 8; radixArray[1] = 8; radixArray[2] = 8; radixArray[3] = 4; + break; + default: + *numRadices = 0; + return; + } +} + +static void +insertHeader(string &kernelString, string &kernelName, clFFT_DataFormat dataFormat) +{ + if(dataFormat == clFFT_SplitComplexFormat) + kernelString += string("__kernel void ") + kernelName + string("(__global float *in_real, __global float *in_imag, __global float *out_real, __global float *out_imag, int dir, int S)\n"); + else + kernelString += string("__kernel void ") + kernelName + string("(__global float2 *in, __global float2 *out, int dir, int S)\n"); +} + +static void +insertVariables(string &kStream, int maxRadix) +{ + kStream += string(" int i, j, r, indexIn, indexOut, index, tid, bNum, xNum, k, l;\n"); + kStream += string(" int s, ii, jj, offset;\n"); + kStream += string(" float2 w;\n"); + kStream += string(" float ang, angf, ang1;\n"); + kStream += string(" __local float *lMemStore, *lMemLoad;\n"); + kStream += string(" float2 a[") + num2str(maxRadix) + string("];\n"); + kStream += string(" int lId = get_local_id( 0 );\n"); + kStream += string(" int groupId = get_group_id( 0 );\n"); +} + +static void +formattedLoad(string &kernelString, int aIndex, int gIndex, clFFT_DataFormat dataFormat) +{ + if(dataFormat == clFFT_InterleavedComplexFormat) + kernelString += string(" a[") + num2str(aIndex) + string("] = in[") + num2str(gIndex) + string("];\n"); + else + { + kernelString += string(" a[") + num2str(aIndex) + string("].x = in_real[") + num2str(gIndex) + string("];\n"); + kernelString += string(" a[") + num2str(aIndex) + string("].y = in_imag[") + num2str(gIndex) + string("];\n"); + } +} + +static void +formattedStore(string &kernelString, int aIndex, int gIndex, clFFT_DataFormat dataFormat) +{ + if(dataFormat == clFFT_InterleavedComplexFormat) + kernelString += string(" out[") + num2str(gIndex) + string("] = a[") + num2str(aIndex) + string("];\n"); + else + { + kernelString += string(" out_real[") + num2str(gIndex) + string("] = a[") + num2str(aIndex) + string("].x;\n"); + kernelString += string(" out_imag[") + num2str(gIndex) + string("] = a[") + num2str(aIndex) + string("].y;\n"); + } +} + +static int +insertGlobalLoadsAndTranspose(string &kernelString, int N, int numWorkItemsPerXForm, int numXFormsPerWG, int R0, int mem_coalesce_width, clFFT_DataFormat dataFormat) +{ + int log2NumWorkItemsPerXForm = (int) log2(numWorkItemsPerXForm); + int groupSize = numWorkItemsPerXForm * numXFormsPerWG; + int i, j; + int lMemSize = 0; + + if(numXFormsPerWG > 1) + kernelString += string(" s = S & ") + num2str(numXFormsPerWG - 1) + string(";\n"); + + if(numWorkItemsPerXForm >= mem_coalesce_width) + { + if(numXFormsPerWG > 1) + { + kernelString += string(" ii = lId & ") + num2str(numWorkItemsPerXForm-1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str(log2NumWorkItemsPerXForm) + string(";\n"); + kernelString += string(" if( !s || (groupId < get_num_groups(0)-1) || (jj < s) ) {\n"); + kernelString += string(" offset = mad24( mad24(groupId, ") + num2str(numXFormsPerWG) + string(", jj), ") + num2str(N) + string(", ii );\n"); + if(dataFormat == clFFT_InterleavedComplexFormat) + { + kernelString += string(" in += offset;\n"); + kernelString += string(" out += offset;\n"); + } + else + { + kernelString += string(" in_real += offset;\n"); + kernelString += string(" in_imag += offset;\n"); + kernelString += string(" out_real += offset;\n"); + kernelString += string(" out_imag += offset;\n"); + } + for(i = 0; i < R0; i++) + formattedLoad(kernelString, i, i*numWorkItemsPerXForm, dataFormat); + kernelString += string(" }\n"); + } + else + { + kernelString += string(" ii = lId;\n"); + kernelString += string(" jj = 0;\n"); + kernelString += string(" offset = mad24(groupId, ") + num2str(N) + string(", ii);\n"); + if(dataFormat == clFFT_InterleavedComplexFormat) + { + kernelString += string(" in += offset;\n"); + kernelString += string(" out += offset;\n"); + } + else + { + kernelString += string(" in_real += offset;\n"); + kernelString += string(" in_imag += offset;\n"); + kernelString += string(" out_real += offset;\n"); + kernelString += string(" out_imag += offset;\n"); + } + for(i = 0; i < R0; i++) + formattedLoad(kernelString, i, i*numWorkItemsPerXForm, dataFormat); + } + } + else if( N >= mem_coalesce_width ) + { + int numInnerIter = N / mem_coalesce_width; + int numOuterIter = numXFormsPerWG / ( groupSize / mem_coalesce_width ); + + kernelString += string(" ii = lId & ") + num2str(mem_coalesce_width - 1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str((int)log2(mem_coalesce_width)) + string(";\n"); + kernelString += string(" lMemStore = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + kernelString += string(" offset = mad24( groupId, ") + num2str(numXFormsPerWG) + string(", jj);\n"); + kernelString += string(" offset = mad24( offset, ") + num2str(N) + string(", ii );\n"); + if(dataFormat == clFFT_InterleavedComplexFormat) + { + kernelString += string(" in += offset;\n"); + kernelString += string(" out += offset;\n"); + } + else + { + kernelString += string(" in_real += offset;\n"); + kernelString += string(" in_imag += offset;\n"); + kernelString += string(" out_real += offset;\n"); + kernelString += string(" out_imag += offset;\n"); + } + + kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n"); + for(i = 0; i < numOuterIter; i++ ) + { + kernelString += string(" if( jj < s ) {\n"); + for(j = 0; j < numInnerIter; j++ ) + formattedLoad(kernelString, i * numInnerIter + j, j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * N, dataFormat); + kernelString += string(" }\n"); + if(i != numOuterIter - 1) + kernelString += string(" jj += ") + num2str(groupSize / mem_coalesce_width) + string(";\n"); + } + kernelString += string("}\n "); + kernelString += string("else {\n"); + for(i = 0; i < numOuterIter; i++ ) + { + for(j = 0; j < numInnerIter; j++ ) + formattedLoad(kernelString, i * numInnerIter + j, j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * N, dataFormat); + } + kernelString += string("}\n"); + + kernelString += string(" ii = lId & ") + num2str(numWorkItemsPerXForm - 1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str(log2NumWorkItemsPerXForm) + string(";\n"); + kernelString += string(" lMemLoad = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii);\n"); + + for( i = 0; i < numOuterIter; i++ ) + { + for( j = 0; j < numInnerIter; j++ ) + { + kernelString += string(" lMemStore[") + num2str(j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * (N + numWorkItemsPerXForm )) + string("] = a[") + + num2str(i * numInnerIter + j) + string("].x;\n"); + } + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < R0; i++ ) + kernelString += string(" a[") + num2str(i) + string("].x = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < numOuterIter; i++ ) + { + for( j = 0; j < numInnerIter; j++ ) + { + kernelString += string(" lMemStore[") + num2str(j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * (N + numWorkItemsPerXForm )) + string("] = a[") + + num2str(i * numInnerIter + j) + string("].y;\n"); + } + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < R0; i++ ) + kernelString += string(" a[") + num2str(i) + string("].y = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG; + } + else + { + kernelString += string(" offset = mad24( groupId, ") + num2str(N * numXFormsPerWG) + string(", lId );\n"); + if(dataFormat == clFFT_InterleavedComplexFormat) + { + kernelString += string(" in += offset;\n"); + kernelString += string(" out += offset;\n"); + } + else + { + kernelString += string(" in_real += offset;\n"); + kernelString += string(" in_imag += offset;\n"); + kernelString += string(" out_real += offset;\n"); + kernelString += string(" out_imag += offset;\n"); + } + + kernelString += string(" ii = lId & ") + num2str(N-1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str((int)log2(N)) + string(";\n"); + kernelString += string(" lMemStore = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + + kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n"); + for( i = 0; i < R0; i++ ) + { + kernelString += string(" if(jj < s )\n"); + formattedLoad(kernelString, i, i*groupSize, dataFormat); + if(i != R0 - 1) + kernelString += string(" jj += ") + num2str(groupSize / N) + string(";\n"); + } + kernelString += string("}\n"); + kernelString += string("else {\n"); + for( i = 0; i < R0; i++ ) + { + formattedLoad(kernelString, i, i*groupSize, dataFormat); + } + kernelString += string("}\n"); + + if(numWorkItemsPerXForm > 1) + { + kernelString += string(" ii = lId & ") + num2str(numWorkItemsPerXForm - 1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str(log2NumWorkItemsPerXForm) + string(";\n"); + kernelString += string(" lMemLoad = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + } + else + { + kernelString += string(" ii = 0;\n"); + kernelString += string(" jj = lId;\n"); + kernelString += string(" lMemLoad = sMem + jj * ") + num2str(N + numWorkItemsPerXForm) + string(";\n"); + } + + + for( i = 0; i < R0; i++ ) + kernelString += string(" lMemStore[") + num2str(i * ( groupSize / N ) * ( N + numWorkItemsPerXForm )) + string("] = a[") + num2str(i) + string("].x;\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < R0; i++ ) + kernelString += string(" a[") + num2str(i) + string("].x = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < R0; i++ ) + kernelString += string(" lMemStore[") + num2str(i * ( groupSize / N ) * ( N + numWorkItemsPerXForm )) + string("] = a[") + num2str(i) + string("].y;\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < R0; i++ ) + kernelString += string(" a[") + num2str(i) + string("].y = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG; + } + + return lMemSize; +} + +static int +insertGlobalStoresAndTranspose(string &kernelString, int N, int maxRadix, int Nr, int numWorkItemsPerXForm, int numXFormsPerWG, int mem_coalesce_width, clFFT_DataFormat dataFormat) +{ + int groupSize = numWorkItemsPerXForm * numXFormsPerWG; + int i, j, k, ind; + int lMemSize = 0; + int numIter = maxRadix / Nr; + string indent = string(""); + + if( numWorkItemsPerXForm >= mem_coalesce_width ) + { + if(numXFormsPerWG > 1) + { + kernelString += string(" if( !s || (groupId < get_num_groups(0)-1) || (jj < s) ) {\n"); + indent = string(" "); + } + for(i = 0; i < maxRadix; i++) + { + j = i % numIter; + k = i / numIter; + ind = j * Nr + k; + formattedStore(kernelString, ind, i*numWorkItemsPerXForm, dataFormat); + } + if(numXFormsPerWG > 1) + kernelString += string(" }\n"); + } + else if( N >= mem_coalesce_width ) + { + int numInnerIter = N / mem_coalesce_width; + int numOuterIter = numXFormsPerWG / ( groupSize / mem_coalesce_width ); + + kernelString += string(" lMemLoad = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + kernelString += string(" ii = lId & ") + num2str(mem_coalesce_width - 1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str((int)log2(mem_coalesce_width)) + string(";\n"); + kernelString += string(" lMemStore = sMem + mad24( jj,") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + + for( i = 0; i < maxRadix; i++ ) + { + j = i % numIter; + k = i / numIter; + ind = j * Nr + k; + kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].x;\n"); + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < numOuterIter; i++ ) + for( j = 0; j < numInnerIter; j++ ) + kernelString += string(" a[") + num2str(i*numInnerIter + j) + string("].x = lMemStore[") + num2str(j*mem_coalesce_width + i*( groupSize / mem_coalesce_width )*(N + numWorkItemsPerXForm)) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < maxRadix; i++ ) + { + j = i % numIter; + k = i / numIter; + ind = j * Nr + k; + kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].y;\n"); + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < numOuterIter; i++ ) + for( j = 0; j < numInnerIter; j++ ) + kernelString += string(" a[") + num2str(i*numInnerIter + j) + string("].y = lMemStore[") + num2str(j*mem_coalesce_width + i*( groupSize / mem_coalesce_width )*(N + numWorkItemsPerXForm)) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n"); + for(i = 0; i < numOuterIter; i++ ) + { + kernelString += string(" if( jj < s ) {\n"); + for(j = 0; j < numInnerIter; j++ ) + formattedStore(kernelString, i*numInnerIter + j, j*mem_coalesce_width + i*(groupSize/mem_coalesce_width)*N, dataFormat); + kernelString += string(" }\n"); + if(i != numOuterIter - 1) + kernelString += string(" jj += ") + num2str(groupSize / mem_coalesce_width) + string(";\n"); + } + kernelString += string("}\n"); + kernelString += string("else {\n"); + for(i = 0; i < numOuterIter; i++ ) + { + for(j = 0; j < numInnerIter; j++ ) + formattedStore(kernelString, i*numInnerIter + j, j*mem_coalesce_width + i*(groupSize/mem_coalesce_width)*N, dataFormat); + } + kernelString += string("}\n"); + + lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG; + } + else + { + kernelString += string(" lMemLoad = sMem + mad24( jj,") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + + kernelString += string(" ii = lId & ") + num2str(N - 1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str((int) log2(N)) + string(";\n"); + kernelString += string(" lMemStore = sMem + mad24( jj,") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + + for( i = 0; i < maxRadix; i++ ) + { + j = i % numIter; + k = i / numIter; + ind = j * Nr + k; + kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].x;\n"); + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < maxRadix; i++ ) + kernelString += string(" a[") + num2str(i) + string("].x = lMemStore[") + num2str(i*( groupSize / N )*( N + numWorkItemsPerXForm )) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < maxRadix; i++ ) + { + j = i % numIter; + k = i / numIter; + ind = j * Nr + k; + kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].y;\n"); + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < maxRadix; i++ ) + kernelString += string(" a[") + num2str(i) + string("].y = lMemStore[") + num2str(i*( groupSize / N )*( N + numWorkItemsPerXForm )) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n"); + for( i = 0; i < maxRadix; i++ ) + { + kernelString += string(" if(jj < s ) {\n"); + formattedStore(kernelString, i, i*groupSize, dataFormat); + kernelString += string(" }\n"); + if( i != maxRadix - 1) + kernelString += string(" jj +=") + num2str(groupSize / N) + string(";\n"); + } + kernelString += string("}\n"); + kernelString += string("else {\n"); + for( i = 0; i < maxRadix; i++ ) + { + formattedStore(kernelString, i, i*groupSize, dataFormat); + } + kernelString += string("}\n"); + + lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG; + } + + return lMemSize; +} + +static void +insertfftKernel(string &kernelString, int Nr, int numIter) +{ + int i; + for(i = 0; i < numIter; i++) + { + kernelString += string(" fftKernel") + num2str(Nr) + string("(a+") + num2str(i*Nr) + string(", dir);\n"); + } +} + +static void +insertTwiddleKernel(string &kernelString, int Nr, int numIter, int Nprev, int len, int numWorkItemsPerXForm) +{ + int z, k; + int logNPrev = log2(Nprev); + + for(z = 0; z < numIter; z++) + { + if(z == 0) + { + if(Nprev > 1) + kernelString += string(" angf = (float) (ii >> ") + num2str(logNPrev) + string(");\n"); + else + kernelString += string(" angf = (float) ii;\n"); + } + else + { + if(Nprev > 1) + kernelString += string(" angf = (float) ((") + num2str(z*numWorkItemsPerXForm) + string(" + ii) >>") + num2str(logNPrev) + string(");\n"); + else + kernelString += string(" angf = (float) (") + num2str(z*numWorkItemsPerXForm) + string(" + ii);\n"); + } + + for(k = 1; k < Nr; k++) { + int ind = z*Nr + k; + //float fac = (float) (2.0 * M_PI * (double) k / (double) len); + kernelString += string(" ang = dir * ( 2.0f * M_PI * ") + num2str(k) + string(".0f / ") + num2str(len) + string(".0f )") + string(" * angf;\n"); + kernelString += string(" w = (float2)(native_cos(ang), native_sin(ang));\n"); + kernelString += string(" a[") + num2str(ind) + string("] = complexMul(a[") + num2str(ind) + string("], w);\n"); + } + } +} + +static int +getPadding(int numWorkItemsPerXForm, int Nprev, int numWorkItemsReq, int numXFormsPerWG, int Nr, int numBanks, int *offset, int *midPad) +{ + if((numWorkItemsPerXForm <= Nprev) || (Nprev >= numBanks)) + *offset = 0; + else { + int numRowsReq = ((numWorkItemsPerXForm < numBanks) ? numWorkItemsPerXForm : numBanks) / Nprev; + int numColsReq = 1; + if(numRowsReq > Nr) + numColsReq = numRowsReq / Nr; + numColsReq = Nprev * numColsReq; + *offset = numColsReq; + } + + if(numWorkItemsPerXForm >= numBanks || numXFormsPerWG == 1) + *midPad = 0; + else { + int bankNum = ( (numWorkItemsReq + *offset) * Nr ) & (numBanks - 1); + if( bankNum >= numWorkItemsPerXForm ) + *midPad = 0; + else + *midPad = numWorkItemsPerXForm - bankNum; + } + + int lMemSize = ( numWorkItemsReq + *offset) * Nr * numXFormsPerWG + *midPad * (numXFormsPerWG - 1); + return lMemSize; +} + + +static void +insertLocalStores(string &kernelString, int numIter, int Nr, int numWorkItemsPerXForm, int numWorkItemsReq, int offset, string &comp) +{ + int z, k; + + for(z = 0; z < numIter; z++) { + for(k = 0; k < Nr; k++) { + int index = k*(numWorkItemsReq + offset) + z*numWorkItemsPerXForm; + kernelString += string(" lMemStore[") + num2str(index) + string("] = a[") + num2str(z*Nr + k) + string("].") + comp + string(";\n"); + } + } + kernelString += string(" barrier(CLK_LOCAL_MEM_FENCE);\n"); +} + +static void +insertLocalLoads(string &kernelString, int n, int Nr, int Nrn, int Nprev, int Ncurr, int numWorkItemsPerXForm, int numWorkItemsReq, int offset, string &comp) +{ + int numWorkItemsReqN = n / Nrn; + int interBlockHNum = max( Nprev / numWorkItemsPerXForm, 1 ); + int interBlockHStride = numWorkItemsPerXForm; + int vertWidth = max(numWorkItemsPerXForm / Nprev, 1); + vertWidth = min( vertWidth, Nr); + int vertNum = Nr / vertWidth; + int vertStride = ( n / Nr + offset ) * vertWidth; + int iter = max( numWorkItemsReqN / numWorkItemsPerXForm, 1); + int intraBlockHStride = (numWorkItemsPerXForm / (Nprev*Nr)) > 1 ? (numWorkItemsPerXForm / (Nprev*Nr)) : 1; + intraBlockHStride *= Nprev; + + int stride = numWorkItemsReq / Nrn; + int i; + for(i = 0; i < iter; i++) { + int ii = i / (interBlockHNum * vertNum); + int zz = i % (interBlockHNum * vertNum); + int jj = zz % interBlockHNum; + int kk = zz / interBlockHNum; + int z; + for(z = 0; z < Nrn; z++) { + int st = kk * vertStride + jj * interBlockHStride + ii * intraBlockHStride + z * stride; + kernelString += string(" a[") + num2str(i*Nrn + z) + string("].") + comp + string(" = lMemLoad[") + num2str(st) + string("];\n"); + } + } + kernelString += string(" barrier(CLK_LOCAL_MEM_FENCE);\n"); +} + +static void +insertLocalLoadIndexArithmatic(string &kernelString, int Nprev, int Nr, int numWorkItemsReq, int numWorkItemsPerXForm, int numXFormsPerWG, int offset, int midPad) +{ + int Ncurr = Nprev * Nr; + int logNcurr = log2(Ncurr); + int logNprev = log2(Nprev); + int incr = (numWorkItemsReq + offset) * Nr + midPad; + + if(Ncurr < numWorkItemsPerXForm) + { + if(Nprev == 1) + kernelString += string(" j = ii & ") + num2str(Ncurr - 1) + string(";\n"); + else + kernelString += string(" j = (ii & ") + num2str(Ncurr - 1) + string(") >> ") + num2str(logNprev) + string(";\n"); + + if(Nprev == 1) + kernelString += string(" i = ii >> ") + num2str(logNcurr) + string(";\n"); + else + kernelString += string(" i = mad24(ii >> ") + num2str(logNcurr) + string(", ") + num2str(Nprev) + string(", ii & ") + num2str(Nprev - 1) + string(");\n"); + } + else + { + if(Nprev == 1) + kernelString += string(" j = ii;\n"); + else + kernelString += string(" j = ii >> ") + num2str(logNprev) + string(";\n"); + if(Nprev == 1) + kernelString += string(" i = 0;\n"); + else + kernelString += string(" i = ii & ") + num2str(Nprev - 1) + string(";\n"); + } + + if(numXFormsPerWG > 1) + kernelString += string(" i = mad24(jj, ") + num2str(incr) + string(", i);\n"); + + kernelString += string(" lMemLoad = sMem + mad24(j, ") + num2str(numWorkItemsReq + offset) + string(", i);\n"); +} + +static void +insertLocalStoreIndexArithmatic(string &kernelString, int numWorkItemsReq, int numXFormsPerWG, int Nr, int offset, int midPad) +{ + if(numXFormsPerWG == 1) { + kernelString += string(" lMemStore = sMem + ii;\n"); + } + else { + kernelString += string(" lMemStore = sMem + mad24(jj, ") + num2str((numWorkItemsReq + offset)*Nr + midPad) + string(", ii);\n"); + } +} + + +static void +createLocalMemfftKernelString(cl_fft_plan *plan) +{ + unsigned int radixArray[10]; + unsigned int numRadix; + + unsigned int n = plan->n.x; + + assert(n <= plan->max_work_item_per_workgroup * plan->max_radix && "signal lenght too big for local mem fft\n"); + + getRadixArray(n, radixArray, &numRadix, 0); + assert(numRadix > 0 && "no radix array supplied\n"); + + if(n/radixArray[0] > plan->max_work_item_per_workgroup) + getRadixArray(n, radixArray, &numRadix, plan->max_radix); + + assert(radixArray[0] <= plan->max_radix && "max radix choosen is greater than allowed\n"); + assert(n/radixArray[0] <= plan->max_work_item_per_workgroup && "required work items per xform greater than maximum work items allowed per work group for local mem fft\n"); + + unsigned int tmpLen = 1; + unsigned int i; + for(i = 0; i < numRadix; i++) + { + assert( radixArray[i] && !( (radixArray[i] - 1) & radixArray[i] ) ); + tmpLen *= radixArray[i]; + } + assert(tmpLen == n && "product of radices choosen doesnt match the length of signal\n"); + + int offset, midPad; + string localString(""), kernelName(""); + + clFFT_DataFormat dataFormat = plan->format; + string *kernelString = plan->kernel_string; + + + cl_fft_kernel_info **kInfo = &plan->kernel_info; + int kCount = 0; + + while(*kInfo) + { + kInfo = &(*kInfo)->next; + kCount++; + } + + kernelName = string("fft") + num2str(kCount); + + *kInfo = (cl_fft_kernel_info *) malloc(sizeof(cl_fft_kernel_info)); + (*kInfo)->kernel = 0; + (*kInfo)->lmem_size = 0; + (*kInfo)->num_workgroups = 0; + (*kInfo)->num_workitems_per_workgroup = 0; + (*kInfo)->dir = cl_fft_kernel_x; + (*kInfo)->in_place_possible = 1; + (*kInfo)->next = NULL; + (*kInfo)->kernel_name = (char *) malloc(sizeof(char)*(kernelName.size()+1)); + strcpy((*kInfo)->kernel_name, kernelName.c_str()); + + unsigned int numWorkItemsPerXForm = n / radixArray[0]; + unsigned int numWorkItemsPerWG = numWorkItemsPerXForm <= 64 ? 64 : numWorkItemsPerXForm; + assert(numWorkItemsPerWG <= plan->max_work_item_per_workgroup); + int numXFormsPerWG = numWorkItemsPerWG / numWorkItemsPerXForm; + (*kInfo)->num_workgroups = 1; + (*kInfo)->num_xforms_per_workgroup = numXFormsPerWG; + (*kInfo)->num_workitems_per_workgroup = numWorkItemsPerWG; + + unsigned int *N = radixArray; + unsigned int maxRadix = N[0]; + unsigned int lMemSize = 0; + + insertVariables(localString, maxRadix); + + lMemSize = insertGlobalLoadsAndTranspose(localString, n, numWorkItemsPerXForm, numXFormsPerWG, maxRadix, plan->min_mem_coalesce_width, dataFormat); + (*kInfo)->lmem_size = (lMemSize > (*kInfo)->lmem_size) ? lMemSize : (*kInfo)->lmem_size; + + string xcomp = string("x"); + string ycomp = string("y"); + + unsigned int Nprev = 1; + unsigned int len = n; + unsigned int r; + for(r = 0; r < numRadix; r++) + { + int numIter = N[0] / N[r]; + int numWorkItemsReq = n / N[r]; + int Ncurr = Nprev * N[r]; + insertfftKernel(localString, N[r], numIter); + + if(r < (numRadix - 1)) { + insertTwiddleKernel(localString, N[r], numIter, Nprev, len, numWorkItemsPerXForm); + lMemSize = getPadding(numWorkItemsPerXForm, Nprev, numWorkItemsReq, numXFormsPerWG, N[r], plan->num_local_mem_banks, &offset, &midPad); + (*kInfo)->lmem_size = (lMemSize > (*kInfo)->lmem_size) ? lMemSize : (*kInfo)->lmem_size; + insertLocalStoreIndexArithmatic(localString, numWorkItemsReq, numXFormsPerWG, N[r], offset, midPad); + insertLocalLoadIndexArithmatic(localString, Nprev, N[r], numWorkItemsReq, numWorkItemsPerXForm, numXFormsPerWG, offset, midPad); + insertLocalStores(localString, numIter, N[r], numWorkItemsPerXForm, numWorkItemsReq, offset, xcomp); + insertLocalLoads(localString, n, N[r], N[r+1], Nprev, Ncurr, numWorkItemsPerXForm, numWorkItemsReq, offset, xcomp); + insertLocalStores(localString, numIter, N[r], numWorkItemsPerXForm, numWorkItemsReq, offset, ycomp); + insertLocalLoads(localString, n, N[r], N[r+1], Nprev, Ncurr, numWorkItemsPerXForm, numWorkItemsReq, offset, ycomp); + Nprev = Ncurr; + len = len / N[r]; + } + } + + lMemSize = insertGlobalStoresAndTranspose(localString, n, maxRadix, N[numRadix - 1], numWorkItemsPerXForm, numXFormsPerWG, plan->min_mem_coalesce_width, dataFormat); + (*kInfo)->lmem_size = (lMemSize > (*kInfo)->lmem_size) ? lMemSize : (*kInfo)->lmem_size; + + insertHeader(*kernelString, kernelName, dataFormat); + *kernelString += string("{\n"); + if((*kInfo)->lmem_size) + *kernelString += string(" __local float sMem[") + num2str((*kInfo)->lmem_size) + string("];\n"); + *kernelString += localString; + *kernelString += string("}\n"); +} + +// For n larger than what can be computed using local memory fft, global transposes +// multiple kernel launces is needed. For these sizes, n can be decomposed using +// much larger base radices i.e. say n = 262144 = 128 x 64 x 32. Thus three kernel +// launches will be needed, first computing 64 x 32, length 128 ffts, second computing +// 128 x 32 length 64 ffts, and finally a kernel computing 128 x 64 length 32 ffts. +// Each of these base radices can futher be divided into factors so that each of these +// base ffts can be computed within one kernel launch using in-register ffts and local +// memory transposes i.e for the first kernel above which computes 64 x 32 ffts on length +// 128, 128 can be decomposed into 128 = 16 x 8 i.e. 8 work items can compute 8 length +// 16 ffts followed by transpose using local memory followed by each of these eight +// work items computing 2 length 8 ffts thus computing 16 length 8 ffts in total. This +// means only 8 work items are needed for computing one length 128 fft. If we choose +// work group size of say 64, we can compute 64/8 = 8 length 128 ffts within one +// work group. Since we need to compute 64 x 32 length 128 ffts in first kernel, this +// means we need to launch 64 x 32 / 8 = 256 work groups with 64 work items in each +// work group where each work group is computing 8 length 128 ffts where each length +// 128 fft is computed by 8 work items. Same logic can be applied to other two kernels +// in this example. Users can play with difference base radices and difference +// decompositions of base radices to generates different kernels and see which gives +// best performance. Following function is just fixed to use 128 as base radix + +void +getGlobalRadixInfo(int n, int *radix, int *R1, int *R2, int *numRadices) +{ + int baseRadix = min(n, 128); + + int numR = 0; + int N = n; + while(N > baseRadix) + { + N /= baseRadix; + numR++; + } + + for(int i = 0; i < numR; i++) + radix[i] = baseRadix; + + radix[numR] = N; + numR++; + *numRadices = numR; + + for(int i = 0; i < numR; i++) + { + int B = radix[i]; + if(B <= 8) + { + R1[i] = B; + R2[i] = 1; + continue; + } + + int r1 = 2; + int r2 = B / r1; + while(r2 > r1) + { + r1 *=2; + r2 = B / r1; + } + R1[i] = r1; + R2[i] = r2; + } +} + +static void +createGlobalFFTKernelString(cl_fft_plan *plan, int n, int BS, cl_fft_kernel_dir dir, int vertBS) +{ + int i, j, k, t; + int radixArr[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int R1Arr[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int R2Arr[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int radix, R1, R2; + int numRadices; + + int maxThreadsPerBlock = plan->max_work_item_per_workgroup; + int maxArrayLen = plan->max_radix; + int batchSize = plan->min_mem_coalesce_width; + clFFT_DataFormat dataFormat = plan->format; + int vertical = (dir == cl_fft_kernel_x) ? 0 : 1; + + getGlobalRadixInfo(n, radixArr, R1Arr, R2Arr, &numRadices); + + int numPasses = numRadices; + + string localString(""), kernelName(""); + string *kernelString = plan->kernel_string; + cl_fft_kernel_info **kInfo = &plan->kernel_info; + int kCount = 0; + + while(*kInfo) + { + kInfo = &(*kInfo)->next; + kCount++; + } + + int N = n; + int m = (int)log2(n); + int Rinit = vertical ? BS : 1; + batchSize = vertical ? min(BS, batchSize) : batchSize; + int passNum; + + for(passNum = 0; passNum < numPasses; passNum++) + { + + localString.clear(); + kernelName.clear(); + + radix = radixArr[passNum]; + R1 = R1Arr[passNum]; + R2 = R2Arr[passNum]; + + int strideI = Rinit; + for(i = 0; i < numPasses; i++) + if(i != passNum) + strideI *= radixArr[i]; + + int strideO = Rinit; + for(i = 0; i < passNum; i++) + strideO *= radixArr[i]; + + int threadsPerXForm = R2; + batchSize = R2 == 1 ? plan->max_work_item_per_workgroup : batchSize; + batchSize = min(batchSize, strideI); + int threadsPerBlock = batchSize * threadsPerXForm; + threadsPerBlock = min(threadsPerBlock, maxThreadsPerBlock); + batchSize = threadsPerBlock / threadsPerXForm; + assert(R2 <= R1); + assert(R1*R2 == radix); + assert(R1 <= maxArrayLen); + assert(threadsPerBlock <= maxThreadsPerBlock); + + int numIter = R1 / R2; + int gInInc = threadsPerBlock / batchSize; + + + int lgStrideO = log2(strideO); + int numBlocksPerXForm = strideI / batchSize; + int numBlocks = numBlocksPerXForm; + if(!vertical) + numBlocks *= BS; + else + numBlocks *= vertBS; + + kernelName = string("fft") + num2str(kCount); + *kInfo = (cl_fft_kernel_info *) malloc(sizeof(cl_fft_kernel_info)); + (*kInfo)->kernel = 0; + if(R2 == 1) + (*kInfo)->lmem_size = 0; + else + { + if(strideO == 1) + (*kInfo)->lmem_size = (radix + 1)*batchSize; + else + (*kInfo)->lmem_size = threadsPerBlock*R1; + } + (*kInfo)->num_workgroups = numBlocks; + (*kInfo)->num_xforms_per_workgroup = 1; + (*kInfo)->num_workitems_per_workgroup = threadsPerBlock; + (*kInfo)->dir = dir; + if( (passNum == (numPasses - 1)) && (numPasses & 1) ) + (*kInfo)->in_place_possible = 1; + else + (*kInfo)->in_place_possible = 0; + (*kInfo)->next = NULL; + (*kInfo)->kernel_name = (char *) malloc(sizeof(char)*(kernelName.size()+1)); + strcpy((*kInfo)->kernel_name, kernelName.c_str()); + + insertVariables(localString, R1); + + if(vertical) + { + localString += string("xNum = groupId >> ") + num2str((int)log2(numBlocksPerXForm)) + string(";\n"); + localString += string("groupId = groupId & ") + num2str(numBlocksPerXForm - 1) + string(";\n"); + localString += string("indexIn = mad24(groupId, ") + num2str(batchSize) + string(", xNum << ") + num2str((int)log2(n*BS)) + string(");\n"); + localString += string("tid = groupId * ") + num2str(batchSize) + string(";\n"); + localString += string("i = tid >> ") + num2str(lgStrideO) + string(";\n"); + localString += string("j = tid & ") + num2str(strideO - 1) + string(";\n"); + int stride = radix*Rinit; + for(i = 0; i < passNum; i++) + stride *= radixArr[i]; + localString += string("indexOut = mad24(i, ") + num2str(stride) + string(", j + ") + string("(xNum << ") + num2str((int) log2(n*BS)) + string("));\n"); + localString += string("bNum = groupId;\n"); + } + else + { + int lgNumBlocksPerXForm = log2(numBlocksPerXForm); + localString += string("bNum = groupId & ") + num2str(numBlocksPerXForm - 1) + string(";\n"); + localString += string("xNum = groupId >> ") + num2str(lgNumBlocksPerXForm) + string(";\n"); + localString += string("indexIn = bNum * ") + num2str(batchSize) + string(";\n"); + localString += string("tid = indexIn;\n"); + localString += string("i = tid >> ") + num2str(lgStrideO) + string(";\n"); + localString += string("j = tid & ") + num2str(strideO - 1) + string(";\n"); + int stride = radix*Rinit; + for(i = 0; i < passNum; i++) + stride *= radixArr[i]; + localString += string("indexOut = mad24(i, ") + num2str(stride) + string(", j);\n"); + localString += string("indexIn += (xNum << ") + num2str(m) + string(");\n"); + localString += string("indexOut += (xNum << ") + num2str(m) + string(");\n"); + } + + // Load Data + int lgBatchSize = log2(batchSize); + localString += string("tid = lId;\n"); + localString += string("i = tid & ") + num2str(batchSize - 1) + string(";\n"); + localString += string("j = tid >> ") + num2str(lgBatchSize) + string(";\n"); + localString += string("indexIn += mad24(j, ") + num2str(strideI) + string(", i);\n"); + + if(dataFormat == clFFT_SplitComplexFormat) + { + localString += string("in_real += indexIn;\n"); + localString += string("in_imag += indexIn;\n"); + for(j = 0; j < R1; j++) + localString += string("a[") + num2str(j) + string("].x = in_real[") + num2str(j*gInInc*strideI) + string("];\n"); + for(j = 0; j < R1; j++) + localString += string("a[") + num2str(j) + string("].y = in_imag[") + num2str(j*gInInc*strideI) + string("];\n"); + } + else + { + localString += string("in += indexIn;\n"); + for(j = 0; j < R1; j++) + localString += string("a[") + num2str(j) + string("] = in[") + num2str(j*gInInc*strideI) + string("];\n"); + } + + localString += string("fftKernel") + num2str(R1) + string("(a, dir);\n"); + + if(R2 > 1) + { + // twiddle + for(k = 1; k < R1; k++) + { + localString += string("ang = dir*(2.0f*M_PI*") + num2str(k) + string("/") + num2str(radix) + string(")*j;\n"); + localString += string("w = (float2)(native_cos(ang), native_sin(ang));\n"); + localString += string("a[") + num2str(k) + string("] = complexMul(a[") + num2str(k) + string("], w);\n"); + } + + // shuffle + numIter = R1 / R2; + localString += string("indexIn = mad24(j, ") + num2str(threadsPerBlock*numIter) + string(", i);\n"); + localString += string("lMemStore = sMem + tid;\n"); + localString += string("lMemLoad = sMem + indexIn;\n"); + for(k = 0; k < R1; k++) + localString += string("lMemStore[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].x;\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + for(k = 0; k < numIter; k++) + for(t = 0; t < R2; t++) + localString += string("a[") + num2str(k*R2+t) + string("].x = lMemLoad[") + num2str(t*batchSize + k*threadsPerBlock) + string("];\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + for(k = 0; k < R1; k++) + localString += string("lMemStore[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].y;\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + for(k = 0; k < numIter; k++) + for(t = 0; t < R2; t++) + localString += string("a[") + num2str(k*R2+t) + string("].y = lMemLoad[") + num2str(t*batchSize + k*threadsPerBlock) + string("];\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + + for(j = 0; j < numIter; j++) + localString += string("fftKernel") + num2str(R2) + string("(a + ") + num2str(j*R2) + string(", dir);\n"); + } + + // twiddle + if(passNum < (numPasses - 1)) + { + localString += string("l = ((bNum << ") + num2str(lgBatchSize) + string(") + i) >> ") + num2str(lgStrideO) + string(";\n"); + localString += string("k = j << ") + num2str((int)log2(R1/R2)) + string(";\n"); + localString += string("ang1 = dir*(2.0f*M_PI/") + num2str(N) + string(")*l;\n"); + for(t = 0; t < R1; t++) + { + localString += string("ang = ang1*(k + ") + num2str((t%R2)*R1 + (t/R2)) + string(");\n"); + localString += string("w = (float2)(native_cos(ang), native_sin(ang));\n"); + localString += string("a[") + num2str(t) + string("] = complexMul(a[") + num2str(t) + string("], w);\n"); + } + } + + // Store Data + if(strideO == 1) + { + + localString += string("lMemStore = sMem + mad24(i, ") + num2str(radix + 1) + string(", j << ") + num2str((int)log2(R1/R2)) + string(");\n"); + localString += string("lMemLoad = sMem + mad24(tid >> ") + num2str((int)log2(radix)) + string(", ") + num2str(radix+1) + string(", tid & ") + num2str(radix-1) + string(");\n"); + + for(int i = 0; i < R1/R2; i++) + for(int j = 0; j < R2; j++) + localString += string("lMemStore[ ") + num2str(i + j*R1) + string("] = a[") + num2str(i*R2+j) + string("].x;\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + if(threadsPerBlock >= radix) + { + for(int i = 0; i < R1; i++) + localString += string("a[") + num2str(i) + string("].x = lMemLoad[") + num2str(i*(radix+1)*(threadsPerBlock/radix)) + string("];\n"); + } + else + { + int innerIter = radix/threadsPerBlock; + int outerIter = R1/innerIter; + for(int i = 0; i < outerIter; i++) + for(int j = 0; j < innerIter; j++) + localString += string("a[") + num2str(i*innerIter+j) + string("].x = lMemLoad[") + num2str(j*threadsPerBlock + i*(radix+1)) + string("];\n"); + } + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + + for(int i = 0; i < R1/R2; i++) + for(int j = 0; j < R2; j++) + localString += string("lMemStore[ ") + num2str(i + j*R1) + string("] = a[") + num2str(i*R2+j) + string("].y;\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + if(threadsPerBlock >= radix) + { + for(int i = 0; i < R1; i++) + localString += string("a[") + num2str(i) + string("].y = lMemLoad[") + num2str(i*(radix+1)*(threadsPerBlock/radix)) + string("];\n"); + } + else + { + int innerIter = radix/threadsPerBlock; + int outerIter = R1/innerIter; + for(int i = 0; i < outerIter; i++) + for(int j = 0; j < innerIter; j++) + localString += string("a[") + num2str(i*innerIter+j) + string("].y = lMemLoad[") + num2str(j*threadsPerBlock + i*(radix+1)) + string("];\n"); + } + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + + localString += string("indexOut += tid;\n"); + if(dataFormat == clFFT_SplitComplexFormat) { + localString += string("out_real += indexOut;\n"); + localString += string("out_imag += indexOut;\n"); + for(k = 0; k < R1; k++) + localString += string("out_real[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].x;\n"); + for(k = 0; k < R1; k++) + localString += string("out_imag[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].y;\n"); + } + else { + localString += string("out += indexOut;\n"); + for(k = 0; k < R1; k++) + localString += string("out[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("];\n"); + } + + } + else + { + localString += string("indexOut += mad24(j, ") + num2str(numIter*strideO) + string(", i);\n"); + if(dataFormat == clFFT_SplitComplexFormat) { + localString += string("out_real += indexOut;\n"); + localString += string("out_imag += indexOut;\n"); + for(k = 0; k < R1; k++) + localString += string("out_real[") + num2str(((k%R2)*R1 + (k/R2))*strideO) + string("] = a[") + num2str(k) + string("].x;\n"); + for(k = 0; k < R1; k++) + localString += string("out_imag[") + num2str(((k%R2)*R1 + (k/R2))*strideO) + string("] = a[") + num2str(k) + string("].y;\n"); + } + else { + localString += string("out += indexOut;\n"); + for(k = 0; k < R1; k++) + localString += string("out[") + num2str(((k%R2)*R1 + (k/R2))*strideO) + string("] = a[") + num2str(k) + string("];\n"); + } + } + + insertHeader(*kernelString, kernelName, dataFormat); + *kernelString += string("{\n"); + if((*kInfo)->lmem_size) + *kernelString += string(" __local float sMem[") + num2str((*kInfo)->lmem_size) + string("];\n"); + *kernelString += localString; + *kernelString += string("}\n"); + + N /= radix; + kInfo = &(*kInfo)->next; + kCount++; + } +} + +void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir) +{ + unsigned int radixArray[10]; + unsigned int numRadix; + + switch(dir) + { + case cl_fft_kernel_x: + if(plan->n.x > plan->max_localmem_fft_size) + { + createGlobalFFTKernelString(plan, plan->n.x, 1, cl_fft_kernel_x, 1); + } + else if(plan->n.x > 1) + { + getRadixArray(plan->n.x, radixArray, &numRadix, 0); + if(plan->n.x / radixArray[0] <= plan->max_work_item_per_workgroup) + { + createLocalMemfftKernelString(plan); + } + else + { + getRadixArray(plan->n.x, radixArray, &numRadix, plan->max_radix); + if(plan->n.x / radixArray[0] <= plan->max_work_item_per_workgroup) + createLocalMemfftKernelString(plan); + else + createGlobalFFTKernelString(plan, plan->n.x, 1, cl_fft_kernel_x, 1); + } + } + break; + + case cl_fft_kernel_y: + if(plan->n.y > 1) + createGlobalFFTKernelString(plan, plan->n.y, plan->n.x, cl_fft_kernel_y, 1); + break; + + case cl_fft_kernel_z: + if(plan->n.z > 1) + createGlobalFFTKernelString(plan, plan->n.z, plan->n.x*plan->n.y, cl_fft_kernel_z, 1); + default: + return; + } +} + diff --git a/RTCP/GPUProc/OpenCL_FFT/src/fft_kernelstring.cpp.orig b/RTCP/GPUProc/OpenCL_FFT/src/fft_kernelstring.cpp.orig new file mode 100644 index 0000000000000000000000000000000000000000..bbb9298d921528032397da8a71cdafa5502a421e --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/fft_kernelstring.cpp.orig @@ -0,0 +1,1256 @@ + +// +// File: fft_kernelstring.cpp +// +// Version: <1.0> +// +// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") +// in consideration of your agreement to the following terms, and your use, +// installation, modification or redistribution of this Apple software +// constitutes acceptance of these terms. If you do not agree with these +// terms, please do not use, install, modify or redistribute this Apple +// software. +// +// In consideration of your agreement to abide by the following terms, and +// subject to these terms, Apple grants you a personal, non - exclusive +// license, under Apple's copyrights in this original Apple software ( the +// "Apple Software" ), to use, reproduce, modify and redistribute the Apple +// Software, with or without modifications, in source and / or binary forms; +// provided that if you redistribute the Apple Software in its entirety and +// without modifications, you must retain this notice and the following text +// and disclaimers in all such redistributions of the Apple Software. Neither +// the name, trademarks, service marks or logos of Apple Inc. may be used to +// endorse or promote products derived from the Apple Software without specific +// prior written permission from Apple. Except as expressly stated in this +// notice, no other rights or licenses, express or implied, are granted by +// Apple herein, including but not limited to any patent rights that may be +// infringed by your derivative works or by other works in which the Apple +// Software may be incorporated. +// +// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO +// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED +// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION +// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. +// +// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR +// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION +// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER +// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR +// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +#include <cstdio> +#include <cstdlib> +#include <cmath> +#include <iostream> +#include <sstream> +#include <cstring> +#include <cassert> +#include "fft_internal.h" +#include "clFFT.h" + +using namespace std; + +#define max(A,B) ((A) > (B) ? (A) : (B)) +#define min(A,B) ((A) < (B) ? (A) : (B)) + +static string +num2str(int num) +{ + char temp[200]; + sprintf(temp, "%d", num); + return string(temp); +} + +// For any n, this function decomposes n into factors for loacal memory tranpose +// based fft. Factors (radices) are sorted such that the first one (radixArray[0]) +// is the largest. This base radix determines the number of registers used by each +// work item and product of remaining radices determine the size of work group needed. +// To make things concrete with and example, suppose n = 1024. It is decomposed into +// 1024 = 16 x 16 x 4. Hence kernel uses float2 a[16], for local in-register fft and +// needs 16 x 4 = 64 work items per work group. So kernel first performance 64 length +// 16 ffts (64 work items working in parallel) following by transpose using local +// memory followed by again 64 length 16 ffts followed by transpose using local memory +// followed by 256 length 4 ffts. For the last step since with size of work group is +// 64 and each work item can array for 16 values, 64 work items can compute 256 length +// 4 ffts by each work item computing 4 length 4 ffts. +// Similarly for n = 2048 = 8 x 8 x 8 x 4, each work group has 8 x 8 x 4 = 256 work +// iterms which each computes 256 (in-parallel) length 8 ffts in-register, followed +// by transpose using local memory, followed by 256 length 8 in-register ffts, followed +// by transpose using local memory, followed by 256 length 8 in-register ffts, followed +// by transpose using local memory, followed by 512 length 4 in-register ffts. Again, +// for the last step, each work item computes two length 4 in-register ffts and thus +// 256 work items are needed to compute all 512 ffts. +// For n = 32 = 8 x 4, 4 work items first compute 4 in-register +// lenth 8 ffts, followed by transpose using local memory followed by 8 in-register +// length 4 ffts, where each work item computes two length 4 ffts thus 4 work items +// can compute 8 length 4 ffts. However if work group size of say 64 is choosen, +// each work group can compute 64/ 4 = 16 size 32 ffts (batched transform). +// Users can play with these parameters to figure what gives best performance on +// their particular device i.e. some device have less register space thus using +// smaller base radix can avoid spilling ... some has small local memory thus +// using smaller work group size may be required etc + +static void +getRadixArray(unsigned int n, unsigned int *radixArray, unsigned int *numRadices, unsigned int maxRadix) +{ + if(maxRadix > 1) + { + maxRadix = min(n, maxRadix); + unsigned int cnt = 0; + while(n > maxRadix) + { + radixArray[cnt++] = maxRadix; + n /= maxRadix; + } + radixArray[cnt++] = n; + *numRadices = cnt; + return; + } + + switch(n) + { + case 2: + *numRadices = 1; + radixArray[0] = 2; + break; + + case 4: + *numRadices = 1; + radixArray[0] = 4; + break; + + case 8: + *numRadices = 1; + radixArray[0] = 8; + break; + + case 16: + *numRadices = 2; + radixArray[0] = 8; radixArray[1] = 2; + break; + + case 32: + *numRadices = 2; + radixArray[0] = 8; radixArray[1] = 4; + break; + + case 64: + *numRadices = 2; + radixArray[0] = 8; radixArray[1] = 8; + break; + + case 128: + *numRadices = 3; + radixArray[0] = 8; radixArray[1] = 4; radixArray[2] = 4; + break; + + case 256: + *numRadices = 4; + radixArray[0] = 4; radixArray[1] = 4; radixArray[2] = 4; radixArray[3] = 4; + break; + + case 512: + *numRadices = 3; + radixArray[0] = 8; radixArray[1] = 8; radixArray[2] = 8; + break; + + case 1024: + *numRadices = 3; + radixArray[0] = 16; radixArray[1] = 16; radixArray[2] = 4; + break; + case 2048: + *numRadices = 4; + radixArray[0] = 8; radixArray[1] = 8; radixArray[2] = 8; radixArray[3] = 4; + break; + default: + *numRadices = 0; + return; + } +} + +static void +insertHeader(string &kernelString, string &kernelName, clFFT_DataFormat dataFormat) +{ + if(dataFormat == clFFT_SplitComplexFormat) + kernelString += string("__kernel void ") + kernelName + string("(__global float *in_real, __global float *in_imag, __global float *out_real, __global float *out_imag, int dir, int S)\n"); + else + kernelString += string("__kernel void ") + kernelName + string("(__global float2 *in, __global float2 *out, int dir, int S)\n"); +} + +static void +insertVariables(string &kStream, int maxRadix) +{ + kStream += string(" int i, j, r, indexIn, indexOut, index, tid, bNum, xNum, k, l;\n"); + kStream += string(" int s, ii, jj, offset;\n"); + kStream += string(" float2 w;\n"); + kStream += string(" float ang, angf, ang1;\n"); + kStream += string(" __local float *lMemStore, *lMemLoad;\n"); + kStream += string(" float2 a[") + num2str(maxRadix) + string("];\n"); + kStream += string(" int lId = get_local_id( 0 );\n"); + kStream += string(" int groupId = get_group_id( 0 );\n"); +} + +static void +formattedLoad(string &kernelString, int aIndex, int gIndex, clFFT_DataFormat dataFormat) +{ + if(dataFormat == clFFT_InterleavedComplexFormat) + kernelString += string(" a[") + num2str(aIndex) + string("] = in[") + num2str(gIndex) + string("];\n"); + else + { + kernelString += string(" a[") + num2str(aIndex) + string("].x = in_real[") + num2str(gIndex) + string("];\n"); + kernelString += string(" a[") + num2str(aIndex) + string("].y = in_imag[") + num2str(gIndex) + string("];\n"); + } +} + +static void +formattedStore(string &kernelString, int aIndex, int gIndex, clFFT_DataFormat dataFormat) +{ + if(dataFormat == clFFT_InterleavedComplexFormat) + kernelString += string(" out[") + num2str(gIndex) + string("] = a[") + num2str(aIndex) + string("];\n"); + else + { + kernelString += string(" out_real[") + num2str(gIndex) + string("] = a[") + num2str(aIndex) + string("].x;\n"); + kernelString += string(" out_imag[") + num2str(gIndex) + string("] = a[") + num2str(aIndex) + string("].y;\n"); + } +} + +static int +insertGlobalLoadsAndTranspose(string &kernelString, int N, int numWorkItemsPerXForm, int numXFormsPerWG, int R0, int mem_coalesce_width, clFFT_DataFormat dataFormat) +{ + int log2NumWorkItemsPerXForm = (int) log2(numWorkItemsPerXForm); + int groupSize = numWorkItemsPerXForm * numXFormsPerWG; + int i, j; + int lMemSize = 0; + + if(numXFormsPerWG > 1) + kernelString += string(" s = S & ") + num2str(numXFormsPerWG - 1) + string(";\n"); + + if(numWorkItemsPerXForm >= mem_coalesce_width) + { + if(numXFormsPerWG > 1) + { + kernelString += string(" ii = lId & ") + num2str(numWorkItemsPerXForm-1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str(log2NumWorkItemsPerXForm) + string(";\n"); + kernelString += string(" if( !s || (groupId < get_num_groups(0)-1) || (jj < s) ) {\n"); + kernelString += string(" offset = mad24( mad24(groupId, ") + num2str(numXFormsPerWG) + string(", jj), ") + num2str(N) + string(", ii );\n"); + if(dataFormat == clFFT_InterleavedComplexFormat) + { + kernelString += string(" in += offset;\n"); + kernelString += string(" out += offset;\n"); + } + else + { + kernelString += string(" in_real += offset;\n"); + kernelString += string(" in_imag += offset;\n"); + kernelString += string(" out_real += offset;\n"); + kernelString += string(" out_imag += offset;\n"); + } + for(i = 0; i < R0; i++) + formattedLoad(kernelString, i, i*numWorkItemsPerXForm, dataFormat); + kernelString += string(" }\n"); + } + else + { + kernelString += string(" ii = lId;\n"); + kernelString += string(" jj = 0;\n"); + kernelString += string(" offset = mad24(groupId, ") + num2str(N) + string(", ii);\n"); + if(dataFormat == clFFT_InterleavedComplexFormat) + { + kernelString += string(" in += offset;\n"); + kernelString += string(" out += offset;\n"); + } + else + { + kernelString += string(" in_real += offset;\n"); + kernelString += string(" in_imag += offset;\n"); + kernelString += string(" out_real += offset;\n"); + kernelString += string(" out_imag += offset;\n"); + } + for(i = 0; i < R0; i++) + formattedLoad(kernelString, i, i*numWorkItemsPerXForm, dataFormat); + } + } + else if( N >= mem_coalesce_width ) + { + int numInnerIter = N / mem_coalesce_width; + int numOuterIter = numXFormsPerWG / ( groupSize / mem_coalesce_width ); + + kernelString += string(" ii = lId & ") + num2str(mem_coalesce_width - 1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str((int)log2(mem_coalesce_width)) + string(";\n"); + kernelString += string(" lMemStore = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + kernelString += string(" offset = mad24( groupId, ") + num2str(numXFormsPerWG) + string(", jj);\n"); + kernelString += string(" offset = mad24( offset, ") + num2str(N) + string(", ii );\n"); + if(dataFormat == clFFT_InterleavedComplexFormat) + { + kernelString += string(" in += offset;\n"); + kernelString += string(" out += offset;\n"); + } + else + { + kernelString += string(" in_real += offset;\n"); + kernelString += string(" in_imag += offset;\n"); + kernelString += string(" out_real += offset;\n"); + kernelString += string(" out_imag += offset;\n"); + } + + kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n"); + for(i = 0; i < numOuterIter; i++ ) + { + kernelString += string(" if( jj < s ) {\n"); + for(j = 0; j < numInnerIter; j++ ) + formattedLoad(kernelString, i * numInnerIter + j, j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * N, dataFormat); + kernelString += string(" }\n"); + if(i != numOuterIter - 1) + kernelString += string(" jj += ") + num2str(groupSize / mem_coalesce_width) + string(";\n"); + } + kernelString += string("}\n "); + kernelString += string("else {\n"); + for(i = 0; i < numOuterIter; i++ ) + { + for(j = 0; j < numInnerIter; j++ ) + formattedLoad(kernelString, i * numInnerIter + j, j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * N, dataFormat); + } + kernelString += string("}\n"); + + kernelString += string(" ii = lId & ") + num2str(numWorkItemsPerXForm - 1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str(log2NumWorkItemsPerXForm) + string(";\n"); + kernelString += string(" lMemLoad = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii);\n"); + + for( i = 0; i < numOuterIter; i++ ) + { + for( j = 0; j < numInnerIter; j++ ) + { + kernelString += string(" lMemStore[") + num2str(j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * (N + numWorkItemsPerXForm )) + string("] = a[") + + num2str(i * numInnerIter + j) + string("].x;\n"); + } + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < R0; i++ ) + kernelString += string(" a[") + num2str(i) + string("].x = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < numOuterIter; i++ ) + { + for( j = 0; j < numInnerIter; j++ ) + { + kernelString += string(" lMemStore[") + num2str(j * mem_coalesce_width + i * ( groupSize / mem_coalesce_width ) * (N + numWorkItemsPerXForm )) + string("] = a[") + + num2str(i * numInnerIter + j) + string("].y;\n"); + } + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < R0; i++ ) + kernelString += string(" a[") + num2str(i) + string("].y = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG; + } + else + { + kernelString += string(" offset = mad24( groupId, ") + num2str(N * numXFormsPerWG) + string(", lId );\n"); + if(dataFormat == clFFT_InterleavedComplexFormat) + { + kernelString += string(" in += offset;\n"); + kernelString += string(" out += offset;\n"); + } + else + { + kernelString += string(" in_real += offset;\n"); + kernelString += string(" in_imag += offset;\n"); + kernelString += string(" out_real += offset;\n"); + kernelString += string(" out_imag += offset;\n"); + } + + kernelString += string(" ii = lId & ") + num2str(N-1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str((int)log2(N)) + string(";\n"); + kernelString += string(" lMemStore = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + + kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n"); + for( i = 0; i < R0; i++ ) + { + kernelString += string(" if(jj < s )\n"); + formattedLoad(kernelString, i, i*groupSize, dataFormat); + if(i != R0 - 1) + kernelString += string(" jj += ") + num2str(groupSize / N) + string(";\n"); + } + kernelString += string("}\n"); + kernelString += string("else {\n"); + for( i = 0; i < R0; i++ ) + { + formattedLoad(kernelString, i, i*groupSize, dataFormat); + } + kernelString += string("}\n"); + + if(numWorkItemsPerXForm > 1) + { + kernelString += string(" ii = lId & ") + num2str(numWorkItemsPerXForm - 1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str(log2NumWorkItemsPerXForm) + string(";\n"); + kernelString += string(" lMemLoad = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + } + else + { + kernelString += string(" ii = 0;\n"); + kernelString += string(" jj = lId;\n"); + kernelString += string(" lMemLoad = sMem + mul24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(");\n"); + } + + + for( i = 0; i < R0; i++ ) + kernelString += string(" lMemStore[") + num2str(i * ( groupSize / N ) * ( N + numWorkItemsPerXForm )) + string("] = a[") + num2str(i) + string("].x;\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < R0; i++ ) + kernelString += string(" a[") + num2str(i) + string("].x = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < R0; i++ ) + kernelString += string(" lMemStore[") + num2str(i * ( groupSize / N ) * ( N + numWorkItemsPerXForm )) + string("] = a[") + num2str(i) + string("].y;\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < R0; i++ ) + kernelString += string(" a[") + num2str(i) + string("].y = lMemLoad[") + num2str(i * numWorkItemsPerXForm) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG; + } + + return lMemSize; +} + +static int +insertGlobalStoresAndTranspose(string &kernelString, int N, int maxRadix, int Nr, int numWorkItemsPerXForm, int numXFormsPerWG, int mem_coalesce_width, clFFT_DataFormat dataFormat) +{ + int groupSize = numWorkItemsPerXForm * numXFormsPerWG; + int i, j, k, ind; + int lMemSize = 0; + int numIter = maxRadix / Nr; + string indent = string(""); + + if( numWorkItemsPerXForm >= mem_coalesce_width ) + { + if(numXFormsPerWG > 1) + { + kernelString += string(" if( !s || (groupId < get_num_groups(0)-1) || (jj < s) ) {\n"); + indent = string(" "); + } + for(i = 0; i < maxRadix; i++) + { + j = i % numIter; + k = i / numIter; + ind = j * Nr + k; + formattedStore(kernelString, ind, i*numWorkItemsPerXForm, dataFormat); + } + if(numXFormsPerWG > 1) + kernelString += string(" }\n"); + } + else if( N >= mem_coalesce_width ) + { + int numInnerIter = N / mem_coalesce_width; + int numOuterIter = numXFormsPerWG / ( groupSize / mem_coalesce_width ); + + kernelString += string(" lMemLoad = sMem + mad24( jj, ") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + kernelString += string(" ii = lId & ") + num2str(mem_coalesce_width - 1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str((int)log2(mem_coalesce_width)) + string(";\n"); + kernelString += string(" lMemStore = sMem + mad24( jj,") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + + for( i = 0; i < maxRadix; i++ ) + { + j = i % numIter; + k = i / numIter; + ind = j * Nr + k; + kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].x;\n"); + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < numOuterIter; i++ ) + for( j = 0; j < numInnerIter; j++ ) + kernelString += string(" a[") + num2str(i*numInnerIter + j) + string("].x = lMemStore[") + num2str(j*mem_coalesce_width + i*( groupSize / mem_coalesce_width )*(N + numWorkItemsPerXForm)) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < maxRadix; i++ ) + { + j = i % numIter; + k = i / numIter; + ind = j * Nr + k; + kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].y;\n"); + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < numOuterIter; i++ ) + for( j = 0; j < numInnerIter; j++ ) + kernelString += string(" a[") + num2str(i*numInnerIter + j) + string("].y = lMemStore[") + num2str(j*mem_coalesce_width + i*( groupSize / mem_coalesce_width )*(N + numWorkItemsPerXForm)) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n"); + for(i = 0; i < numOuterIter; i++ ) + { + kernelString += string(" if( jj < s ) {\n"); + for(j = 0; j < numInnerIter; j++ ) + formattedStore(kernelString, i*numInnerIter + j, j*mem_coalesce_width + i*(groupSize/mem_coalesce_width)*N, dataFormat); + kernelString += string(" }\n"); + if(i != numOuterIter - 1) + kernelString += string(" jj += ") + num2str(groupSize / mem_coalesce_width) + string(";\n"); + } + kernelString += string("}\n"); + kernelString += string("else {\n"); + for(i = 0; i < numOuterIter; i++ ) + { + for(j = 0; j < numInnerIter; j++ ) + formattedStore(kernelString, i*numInnerIter + j, j*mem_coalesce_width + i*(groupSize/mem_coalesce_width)*N, dataFormat); + } + kernelString += string("}\n"); + + lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG; + } + else + { + kernelString += string(" lMemLoad = sMem + mad24( jj,") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + + kernelString += string(" ii = lId & ") + num2str(N - 1) + string(";\n"); + kernelString += string(" jj = lId >> ") + num2str((int) log2(N)) + string(";\n"); + kernelString += string(" lMemStore = sMem + mad24( jj,") + num2str(N + numWorkItemsPerXForm) + string(", ii );\n"); + + for( i = 0; i < maxRadix; i++ ) + { + j = i % numIter; + k = i / numIter; + ind = j * Nr + k; + kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].x;\n"); + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < maxRadix; i++ ) + kernelString += string(" a[") + num2str(i) + string("].x = lMemStore[") + num2str(i*( groupSize / N )*( N + numWorkItemsPerXForm )) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < maxRadix; i++ ) + { + j = i % numIter; + k = i / numIter; + ind = j * Nr + k; + kernelString += string(" lMemLoad[") + num2str(i*numWorkItemsPerXForm) + string("] = a[") + num2str(ind) + string("].y;\n"); + } + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + for( i = 0; i < maxRadix; i++ ) + kernelString += string(" a[") + num2str(i) + string("].y = lMemStore[") + num2str(i*( groupSize / N )*( N + numWorkItemsPerXForm )) + string("];\n"); + kernelString += string(" barrier( CLK_LOCAL_MEM_FENCE );\n"); + + kernelString += string("if((groupId == get_num_groups(0)-1) && s) {\n"); + for( i = 0; i < maxRadix; i++ ) + { + kernelString += string(" if(jj < s ) {\n"); + formattedStore(kernelString, i, i*groupSize, dataFormat); + kernelString += string(" }\n"); + if( i != maxRadix - 1) + kernelString += string(" jj +=") + num2str(groupSize / N) + string(";\n"); + } + kernelString += string("}\n"); + kernelString += string("else {\n"); + for( i = 0; i < maxRadix; i++ ) + { + formattedStore(kernelString, i, i*groupSize, dataFormat); + } + kernelString += string("}\n"); + + lMemSize = (N + numWorkItemsPerXForm) * numXFormsPerWG; + } + + return lMemSize; +} + +static void +insertfftKernel(string &kernelString, int Nr, int numIter) +{ + int i; + for(i = 0; i < numIter; i++) + { + kernelString += string(" fftKernel") + num2str(Nr) + string("(a+") + num2str(i*Nr) + string(", dir);\n"); + } +} + +static void +insertTwiddleKernel(string &kernelString, int Nr, int numIter, int Nprev, int len, int numWorkItemsPerXForm) +{ + int z, k; + int logNPrev = log2(Nprev); + + for(z = 0; z < numIter; z++) + { + if(z == 0) + { + if(Nprev > 1) + kernelString += string(" angf = (float) (ii >> ") + num2str(logNPrev) + string(");\n"); + else + kernelString += string(" angf = (float) ii;\n"); + } + else + { + if(Nprev > 1) + kernelString += string(" angf = (float) ((") + num2str(z*numWorkItemsPerXForm) + string(" + ii) >>") + num2str(logNPrev) + string(");\n"); + else + kernelString += string(" angf = (float) (") + num2str(z*numWorkItemsPerXForm) + string(" + ii);\n"); + } + + for(k = 1; k < Nr; k++) { + int ind = z*Nr + k; + //float fac = (float) (2.0 * M_PI * (double) k / (double) len); + kernelString += string(" ang = dir * ( 2.0f * M_PI * ") + num2str(k) + string(".0f / ") + num2str(len) + string(".0f )") + string(" * angf;\n"); + kernelString += string(" w = (float2)(native_cos(ang), native_sin(ang));\n"); + kernelString += string(" a[") + num2str(ind) + string("] = complexMul(a[") + num2str(ind) + string("], w);\n"); + } + } +} + +static int +getPadding(int numWorkItemsPerXForm, int Nprev, int numWorkItemsReq, int numXFormsPerWG, int Nr, int numBanks, int *offset, int *midPad) +{ + if((numWorkItemsPerXForm <= Nprev) || (Nprev >= numBanks)) + *offset = 0; + else { + int numRowsReq = ((numWorkItemsPerXForm < numBanks) ? numWorkItemsPerXForm : numBanks) / Nprev; + int numColsReq = 1; + if(numRowsReq > Nr) + numColsReq = numRowsReq / Nr; + numColsReq = Nprev * numColsReq; + *offset = numColsReq; + } + + if(numWorkItemsPerXForm >= numBanks || numXFormsPerWG == 1) + *midPad = 0; + else { + int bankNum = ( (numWorkItemsReq + *offset) * Nr ) & (numBanks - 1); + if( bankNum >= numWorkItemsPerXForm ) + *midPad = 0; + else + *midPad = numWorkItemsPerXForm - bankNum; + } + + int lMemSize = ( numWorkItemsReq + *offset) * Nr * numXFormsPerWG + *midPad * (numXFormsPerWG - 1); + return lMemSize; +} + + +static void +insertLocalStores(string &kernelString, int numIter, int Nr, int numWorkItemsPerXForm, int numWorkItemsReq, int offset, string &comp) +{ + int z, k; + + for(z = 0; z < numIter; z++) { + for(k = 0; k < Nr; k++) { + int index = k*(numWorkItemsReq + offset) + z*numWorkItemsPerXForm; + kernelString += string(" lMemStore[") + num2str(index) + string("] = a[") + num2str(z*Nr + k) + string("].") + comp + string(";\n"); + } + } + kernelString += string(" barrier(CLK_LOCAL_MEM_FENCE);\n"); +} + +static void +insertLocalLoads(string &kernelString, int n, int Nr, int Nrn, int Nprev, int Ncurr, int numWorkItemsPerXForm, int numWorkItemsReq, int offset, string &comp) +{ + int numWorkItemsReqN = n / Nrn; + int interBlockHNum = max( Nprev / numWorkItemsPerXForm, 1 ); + int interBlockHStride = numWorkItemsPerXForm; + int vertWidth = max(numWorkItemsPerXForm / Nprev, 1); + vertWidth = min( vertWidth, Nr); + int vertNum = Nr / vertWidth; + int vertStride = ( n / Nr + offset ) * vertWidth; + int iter = max( numWorkItemsReqN / numWorkItemsPerXForm, 1); + int intraBlockHStride = (numWorkItemsPerXForm / (Nprev*Nr)) > 1 ? (numWorkItemsPerXForm / (Nprev*Nr)) : 1; + intraBlockHStride *= Nprev; + + int stride = numWorkItemsReq / Nrn; + int i; + for(i = 0; i < iter; i++) { + int ii = i / (interBlockHNum * vertNum); + int zz = i % (interBlockHNum * vertNum); + int jj = zz % interBlockHNum; + int kk = zz / interBlockHNum; + int z; + for(z = 0; z < Nrn; z++) { + int st = kk * vertStride + jj * interBlockHStride + ii * intraBlockHStride + z * stride; + kernelString += string(" a[") + num2str(i*Nrn + z) + string("].") + comp + string(" = lMemLoad[") + num2str(st) + string("];\n"); + } + } + kernelString += string(" barrier(CLK_LOCAL_MEM_FENCE);\n"); +} + +static void +insertLocalLoadIndexArithmatic(string &kernelString, int Nprev, int Nr, int numWorkItemsReq, int numWorkItemsPerXForm, int numXFormsPerWG, int offset, int midPad) +{ + int Ncurr = Nprev * Nr; + int logNcurr = log2(Ncurr); + int logNprev = log2(Nprev); + int incr = (numWorkItemsReq + offset) * Nr + midPad; + + if(Ncurr < numWorkItemsPerXForm) + { + if(Nprev == 1) + kernelString += string(" j = ii & ") + num2str(Ncurr - 1) + string(";\n"); + else + kernelString += string(" j = (ii & ") + num2str(Ncurr - 1) + string(") >> ") + num2str(logNprev) + string(";\n"); + + if(Nprev == 1) + kernelString += string(" i = ii >> ") + num2str(logNcurr) + string(";\n"); + else + kernelString += string(" i = mad24(ii >> ") + num2str(logNcurr) + string(", ") + num2str(Nprev) + string(", ii & ") + num2str(Nprev - 1) + string(");\n"); + } + else + { + if(Nprev == 1) + kernelString += string(" j = ii;\n"); + else + kernelString += string(" j = ii >> ") + num2str(logNprev) + string(";\n"); + if(Nprev == 1) + kernelString += string(" i = 0;\n"); + else + kernelString += string(" i = ii & ") + num2str(Nprev - 1) + string(";\n"); + } + + if(numXFormsPerWG > 1) + kernelString += string(" i = mad24(jj, ") + num2str(incr) + string(", i);\n"); + + kernelString += string(" lMemLoad = sMem + mad24(j, ") + num2str(numWorkItemsReq + offset) + string(", i);\n"); +} + +static void +insertLocalStoreIndexArithmatic(string &kernelString, int numWorkItemsReq, int numXFormsPerWG, int Nr, int offset, int midPad) +{ + if(numXFormsPerWG == 1) { + kernelString += string(" lMemStore = sMem + ii;\n"); + } + else { + kernelString += string(" lMemStore = sMem + mad24(jj, ") + num2str((numWorkItemsReq + offset)*Nr + midPad) + string(", ii);\n"); + } +} + + +static void +createLocalMemfftKernelString(cl_fft_plan *plan) +{ + unsigned int radixArray[10]; + unsigned int numRadix; + + unsigned int n = plan->n.x; + + assert(n <= plan->max_work_item_per_workgroup * plan->max_radix && "signal lenght too big for local mem fft\n"); + + getRadixArray(n, radixArray, &numRadix, 0); + assert(numRadix > 0 && "no radix array supplied\n"); + + if(n/radixArray[0] > plan->max_work_item_per_workgroup) + getRadixArray(n, radixArray, &numRadix, plan->max_radix); + + assert(radixArray[0] <= plan->max_radix && "max radix choosen is greater than allowed\n"); + assert(n/radixArray[0] <= plan->max_work_item_per_workgroup && "required work items per xform greater than maximum work items allowed per work group for local mem fft\n"); + + unsigned int tmpLen = 1; + unsigned int i; + for(i = 0; i < numRadix; i++) + { + assert( radixArray[i] && !( (radixArray[i] - 1) & radixArray[i] ) ); + tmpLen *= radixArray[i]; + } + assert(tmpLen == n && "product of radices choosen doesnt match the length of signal\n"); + + int offset, midPad; + string localString(""), kernelName(""); + + clFFT_DataFormat dataFormat = plan->format; + string *kernelString = plan->kernel_string; + + + cl_fft_kernel_info **kInfo = &plan->kernel_info; + int kCount = 0; + + while(*kInfo) + { + kInfo = &(*kInfo)->next; + kCount++; + } + + kernelName = string("fft") + num2str(kCount); + + *kInfo = (cl_fft_kernel_info *) malloc(sizeof(cl_fft_kernel_info)); + (*kInfo)->kernel = 0; + (*kInfo)->lmem_size = 0; + (*kInfo)->num_workgroups = 0; + (*kInfo)->num_workitems_per_workgroup = 0; + (*kInfo)->dir = cl_fft_kernel_x; + (*kInfo)->in_place_possible = 1; + (*kInfo)->next = NULL; + (*kInfo)->kernel_name = (char *) malloc(sizeof(char)*(kernelName.size()+1)); + strcpy((*kInfo)->kernel_name, kernelName.c_str()); + + unsigned int numWorkItemsPerXForm = n / radixArray[0]; + unsigned int numWorkItemsPerWG = numWorkItemsPerXForm <= 64 ? 64 : numWorkItemsPerXForm; + assert(numWorkItemsPerWG <= plan->max_work_item_per_workgroup); + int numXFormsPerWG = numWorkItemsPerWG / numWorkItemsPerXForm; + (*kInfo)->num_workgroups = 1; + (*kInfo)->num_xforms_per_workgroup = numXFormsPerWG; + (*kInfo)->num_workitems_per_workgroup = numWorkItemsPerWG; + + unsigned int *N = radixArray; + unsigned int maxRadix = N[0]; + unsigned int lMemSize = 0; + + insertVariables(localString, maxRadix); + + lMemSize = insertGlobalLoadsAndTranspose(localString, n, numWorkItemsPerXForm, numXFormsPerWG, maxRadix, plan->min_mem_coalesce_width, dataFormat); + (*kInfo)->lmem_size = (lMemSize > (*kInfo)->lmem_size) ? lMemSize : (*kInfo)->lmem_size; + + string xcomp = string("x"); + string ycomp = string("y"); + + unsigned int Nprev = 1; + unsigned int len = n; + unsigned int r; + for(r = 0; r < numRadix; r++) + { + int numIter = N[0] / N[r]; + int numWorkItemsReq = n / N[r]; + int Ncurr = Nprev * N[r]; + insertfftKernel(localString, N[r], numIter); + + if(r < (numRadix - 1)) { + insertTwiddleKernel(localString, N[r], numIter, Nprev, len, numWorkItemsPerXForm); + lMemSize = getPadding(numWorkItemsPerXForm, Nprev, numWorkItemsReq, numXFormsPerWG, N[r], plan->num_local_mem_banks, &offset, &midPad); + (*kInfo)->lmem_size = (lMemSize > (*kInfo)->lmem_size) ? lMemSize : (*kInfo)->lmem_size; + insertLocalStoreIndexArithmatic(localString, numWorkItemsReq, numXFormsPerWG, N[r], offset, midPad); + insertLocalLoadIndexArithmatic(localString, Nprev, N[r], numWorkItemsReq, numWorkItemsPerXForm, numXFormsPerWG, offset, midPad); + insertLocalStores(localString, numIter, N[r], numWorkItemsPerXForm, numWorkItemsReq, offset, xcomp); + insertLocalLoads(localString, n, N[r], N[r+1], Nprev, Ncurr, numWorkItemsPerXForm, numWorkItemsReq, offset, xcomp); + insertLocalStores(localString, numIter, N[r], numWorkItemsPerXForm, numWorkItemsReq, offset, ycomp); + insertLocalLoads(localString, n, N[r], N[r+1], Nprev, Ncurr, numWorkItemsPerXForm, numWorkItemsReq, offset, ycomp); + Nprev = Ncurr; + len = len / N[r]; + } + } + + lMemSize = insertGlobalStoresAndTranspose(localString, n, maxRadix, N[numRadix - 1], numWorkItemsPerXForm, numXFormsPerWG, plan->min_mem_coalesce_width, dataFormat); + (*kInfo)->lmem_size = (lMemSize > (*kInfo)->lmem_size) ? lMemSize : (*kInfo)->lmem_size; + + insertHeader(*kernelString, kernelName, dataFormat); + *kernelString += string("{\n"); + if((*kInfo)->lmem_size) + *kernelString += string(" __local float sMem[") + num2str((*kInfo)->lmem_size) + string("];\n"); + *kernelString += localString; + *kernelString += string("}\n"); +} + +// For n larger than what can be computed using local memory fft, global transposes +// multiple kernel launces is needed. For these sizes, n can be decomposed using +// much larger base radices i.e. say n = 262144 = 128 x 64 x 32. Thus three kernel +// launches will be needed, first computing 64 x 32, length 128 ffts, second computing +// 128 x 32 length 64 ffts, and finally a kernel computing 128 x 64 length 32 ffts. +// Each of these base radices can futher be divided into factors so that each of these +// base ffts can be computed within one kernel launch using in-register ffts and local +// memory transposes i.e for the first kernel above which computes 64 x 32 ffts on length +// 128, 128 can be decomposed into 128 = 16 x 8 i.e. 8 work items can compute 8 length +// 16 ffts followed by transpose using local memory followed by each of these eight +// work items computing 2 length 8 ffts thus computing 16 length 8 ffts in total. This +// means only 8 work items are needed for computing one length 128 fft. If we choose +// work group size of say 64, we can compute 64/8 = 8 length 128 ffts within one +// work group. Since we need to compute 64 x 32 length 128 ffts in first kernel, this +// means we need to launch 64 x 32 / 8 = 256 work groups with 64 work items in each +// work group where each work group is computing 8 length 128 ffts where each length +// 128 fft is computed by 8 work items. Same logic can be applied to other two kernels +// in this example. Users can play with difference base radices and difference +// decompositions of base radices to generates different kernels and see which gives +// best performance. Following function is just fixed to use 128 as base radix + +void +getGlobalRadixInfo(int n, int *radix, int *R1, int *R2, int *numRadices) +{ + int baseRadix = min(n, 128); + + int numR = 0; + int N = n; + while(N > baseRadix) + { + N /= baseRadix; + numR++; + } + + for(int i = 0; i < numR; i++) + radix[i] = baseRadix; + + radix[numR] = N; + numR++; + *numRadices = numR; + + for(int i = 0; i < numR; i++) + { + int B = radix[i]; + if(B <= 8) + { + R1[i] = B; + R2[i] = 1; + continue; + } + + int r1 = 2; + int r2 = B / r1; + while(r2 > r1) + { + r1 *=2; + r2 = B / r1; + } + R1[i] = r1; + R2[i] = r2; + } +} + +static void +createGlobalFFTKernelString(cl_fft_plan *plan, int n, int BS, cl_fft_kernel_dir dir, int vertBS) +{ + int i, j, k, t; + int radixArr[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int R1Arr[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int R2Arr[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + int radix, R1, R2; + int numRadices; + + int maxThreadsPerBlock = plan->max_work_item_per_workgroup; + int maxArrayLen = plan->max_radix; + int batchSize = plan->min_mem_coalesce_width; + clFFT_DataFormat dataFormat = plan->format; + int vertical = (dir == cl_fft_kernel_x) ? 0 : 1; + + getGlobalRadixInfo(n, radixArr, R1Arr, R2Arr, &numRadices); + + int numPasses = numRadices; + + string localString(""), kernelName(""); + string *kernelString = plan->kernel_string; + cl_fft_kernel_info **kInfo = &plan->kernel_info; + int kCount = 0; + + while(*kInfo) + { + kInfo = &(*kInfo)->next; + kCount++; + } + + int N = n; + int m = (int)log2(n); + int Rinit = vertical ? BS : 1; + batchSize = vertical ? min(BS, batchSize) : batchSize; + int passNum; + + for(passNum = 0; passNum < numPasses; passNum++) + { + + localString.clear(); + kernelName.clear(); + + radix = radixArr[passNum]; + R1 = R1Arr[passNum]; + R2 = R2Arr[passNum]; + + int strideI = Rinit; + for(i = 0; i < numPasses; i++) + if(i != passNum) + strideI *= radixArr[i]; + + int strideO = Rinit; + for(i = 0; i < passNum; i++) + strideO *= radixArr[i]; + + int threadsPerXForm = R2; + batchSize = R2 == 1 ? plan->max_work_item_per_workgroup : batchSize; + batchSize = min(batchSize, strideI); + int threadsPerBlock = batchSize * threadsPerXForm; + threadsPerBlock = min(threadsPerBlock, maxThreadsPerBlock); + batchSize = threadsPerBlock / threadsPerXForm; + assert(R2 <= R1); + assert(R1*R2 == radix); + assert(R1 <= maxArrayLen); + assert(threadsPerBlock <= maxThreadsPerBlock); + + int numIter = R1 / R2; + int gInInc = threadsPerBlock / batchSize; + + + int lgStrideO = log2(strideO); + int numBlocksPerXForm = strideI / batchSize; + int numBlocks = numBlocksPerXForm; + if(!vertical) + numBlocks *= BS; + else + numBlocks *= vertBS; + + kernelName = string("fft") + num2str(kCount); + *kInfo = (cl_fft_kernel_info *) malloc(sizeof(cl_fft_kernel_info)); + (*kInfo)->kernel = 0; + if(R2 == 1) + (*kInfo)->lmem_size = 0; + else + { + if(strideO == 1) + (*kInfo)->lmem_size = (radix + 1)*batchSize; + else + (*kInfo)->lmem_size = threadsPerBlock*R1; + } + (*kInfo)->num_workgroups = numBlocks; + (*kInfo)->num_xforms_per_workgroup = 1; + (*kInfo)->num_workitems_per_workgroup = threadsPerBlock; + (*kInfo)->dir = dir; + if( (passNum == (numPasses - 1)) && (numPasses & 1) ) + (*kInfo)->in_place_possible = 1; + else + (*kInfo)->in_place_possible = 0; + (*kInfo)->next = NULL; + (*kInfo)->kernel_name = (char *) malloc(sizeof(char)*(kernelName.size()+1)); + strcpy((*kInfo)->kernel_name, kernelName.c_str()); + + insertVariables(localString, R1); + + if(vertical) + { + localString += string("xNum = groupId >> ") + num2str((int)log2(numBlocksPerXForm)) + string(";\n"); + localString += string("groupId = groupId & ") + num2str(numBlocksPerXForm - 1) + string(";\n"); + localString += string("indexIn = mad24(groupId, ") + num2str(batchSize) + string(", xNum << ") + num2str((int)log2(n*BS)) + string(");\n"); + localString += string("tid = mul24(groupId, ") + num2str(batchSize) + string(");\n"); + localString += string("i = tid >> ") + num2str(lgStrideO) + string(";\n"); + localString += string("j = tid & ") + num2str(strideO - 1) + string(";\n"); + int stride = radix*Rinit; + for(i = 0; i < passNum; i++) + stride *= radixArr[i]; + localString += string("indexOut = mad24(i, ") + num2str(stride) + string(", j + ") + string("(xNum << ") + num2str((int) log2(n*BS)) + string("));\n"); + localString += string("bNum = groupId;\n"); + } + else + { + int lgNumBlocksPerXForm = log2(numBlocksPerXForm); + localString += string("bNum = groupId & ") + num2str(numBlocksPerXForm - 1) + string(";\n"); + localString += string("xNum = groupId >> ") + num2str(lgNumBlocksPerXForm) + string(";\n"); + localString += string("indexIn = mul24(bNum, ") + num2str(batchSize) + string(");\n"); + localString += string("tid = indexIn;\n"); + localString += string("i = tid >> ") + num2str(lgStrideO) + string(";\n"); + localString += string("j = tid & ") + num2str(strideO - 1) + string(";\n"); + int stride = radix*Rinit; + for(i = 0; i < passNum; i++) + stride *= radixArr[i]; + localString += string("indexOut = mad24(i, ") + num2str(stride) + string(", j);\n"); + localString += string("indexIn += (xNum << ") + num2str(m) + string(");\n"); + localString += string("indexOut += (xNum << ") + num2str(m) + string(");\n"); + } + + // Load Data + int lgBatchSize = log2(batchSize); + localString += string("tid = lId;\n"); + localString += string("i = tid & ") + num2str(batchSize - 1) + string(";\n"); + localString += string("j = tid >> ") + num2str(lgBatchSize) + string(";\n"); + localString += string("indexIn += mad24(j, ") + num2str(strideI) + string(", i);\n"); + + if(dataFormat == clFFT_SplitComplexFormat) + { + localString += string("in_real += indexIn;\n"); + localString += string("in_imag += indexIn;\n"); + for(j = 0; j < R1; j++) + localString += string("a[") + num2str(j) + string("].x = in_real[") + num2str(j*gInInc*strideI) + string("];\n"); + for(j = 0; j < R1; j++) + localString += string("a[") + num2str(j) + string("].y = in_imag[") + num2str(j*gInInc*strideI) + string("];\n"); + } + else + { + localString += string("in += indexIn;\n"); + for(j = 0; j < R1; j++) + localString += string("a[") + num2str(j) + string("] = in[") + num2str(j*gInInc*strideI) + string("];\n"); + } + + localString += string("fftKernel") + num2str(R1) + string("(a, dir);\n"); + + if(R2 > 1) + { + // twiddle + for(k = 1; k < R1; k++) + { + localString += string("ang = dir*(2.0f*M_PI*") + num2str(k) + string("/") + num2str(radix) + string(")*j;\n"); + localString += string("w = (float2)(native_cos(ang), native_sin(ang));\n"); + localString += string("a[") + num2str(k) + string("] = complexMul(a[") + num2str(k) + string("], w);\n"); + } + + // shuffle + numIter = R1 / R2; + localString += string("indexIn = mad24(j, ") + num2str(threadsPerBlock*numIter) + string(", i);\n"); + localString += string("lMemStore = sMem + tid;\n"); + localString += string("lMemLoad = sMem + indexIn;\n"); + for(k = 0; k < R1; k++) + localString += string("lMemStore[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].x;\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + for(k = 0; k < numIter; k++) + for(t = 0; t < R2; t++) + localString += string("a[") + num2str(k*R2+t) + string("].x = lMemLoad[") + num2str(t*batchSize + k*threadsPerBlock) + string("];\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + for(k = 0; k < R1; k++) + localString += string("lMemStore[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].y;\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + for(k = 0; k < numIter; k++) + for(t = 0; t < R2; t++) + localString += string("a[") + num2str(k*R2+t) + string("].y = lMemLoad[") + num2str(t*batchSize + k*threadsPerBlock) + string("];\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + + for(j = 0; j < numIter; j++) + localString += string("fftKernel") + num2str(R2) + string("(a + ") + num2str(j*R2) + string(", dir);\n"); + } + + // twiddle + if(passNum < (numPasses - 1)) + { + localString += string("l = ((bNum << ") + num2str(lgBatchSize) + string(") + i) >> ") + num2str(lgStrideO) + string(";\n"); + localString += string("k = j << ") + num2str((int)log2(R1/R2)) + string(";\n"); + localString += string("ang1 = dir*(2.0f*M_PI/") + num2str(N) + string(")*l;\n"); + for(t = 0; t < R1; t++) + { + localString += string("ang = ang1*(k + ") + num2str((t%R2)*R1 + (t/R2)) + string(");\n"); + localString += string("w = (float2)(native_cos(ang), native_sin(ang));\n"); + localString += string("a[") + num2str(t) + string("] = complexMul(a[") + num2str(t) + string("], w);\n"); + } + } + + // Store Data + if(strideO == 1) + { + + localString += string("lMemStore = sMem + mad24(i, ") + num2str(radix + 1) + string(", j << ") + num2str((int)log2(R1/R2)) + string(");\n"); + localString += string("lMemLoad = sMem + mad24(tid >> ") + num2str((int)log2(radix)) + string(", ") + num2str(radix+1) + string(", tid & ") + num2str(radix-1) + string(");\n"); + + for(int i = 0; i < R1/R2; i++) + for(int j = 0; j < R2; j++) + localString += string("lMemStore[ ") + num2str(i + j*R1) + string("] = a[") + num2str(i*R2+j) + string("].x;\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + if(threadsPerBlock >= radix) + { + for(int i = 0; i < R1; i++) + localString += string("a[") + num2str(i) + string("].x = lMemLoad[") + num2str(i*(radix+1)*(threadsPerBlock/radix)) + string("];\n"); + } + else + { + int innerIter = radix/threadsPerBlock; + int outerIter = R1/innerIter; + for(int i = 0; i < outerIter; i++) + for(int j = 0; j < innerIter; j++) + localString += string("a[") + num2str(i*innerIter+j) + string("].x = lMemLoad[") + num2str(j*threadsPerBlock + i*(radix+1)) + string("];\n"); + } + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + + for(int i = 0; i < R1/R2; i++) + for(int j = 0; j < R2; j++) + localString += string("lMemStore[ ") + num2str(i + j*R1) + string("] = a[") + num2str(i*R2+j) + string("].y;\n"); + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + if(threadsPerBlock >= radix) + { + for(int i = 0; i < R1; i++) + localString += string("a[") + num2str(i) + string("].y = lMemLoad[") + num2str(i*(radix+1)*(threadsPerBlock/radix)) + string("];\n"); + } + else + { + int innerIter = radix/threadsPerBlock; + int outerIter = R1/innerIter; + for(int i = 0; i < outerIter; i++) + for(int j = 0; j < innerIter; j++) + localString += string("a[") + num2str(i*innerIter+j) + string("].y = lMemLoad[") + num2str(j*threadsPerBlock + i*(radix+1)) + string("];\n"); + } + localString += string("barrier(CLK_LOCAL_MEM_FENCE);\n"); + + localString += string("indexOut += tid;\n"); + if(dataFormat == clFFT_SplitComplexFormat) { + localString += string("out_real += indexOut;\n"); + localString += string("out_imag += indexOut;\n"); + for(k = 0; k < R1; k++) + localString += string("out_real[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].x;\n"); + for(k = 0; k < R1; k++) + localString += string("out_imag[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("].y;\n"); + } + else { + localString += string("out += indexOut;\n"); + for(k = 0; k < R1; k++) + localString += string("out[") + num2str(k*threadsPerBlock) + string("] = a[") + num2str(k) + string("];\n"); + } + + } + else + { + localString += string("indexOut += mad24(j, ") + num2str(numIter*strideO) + string(", i);\n"); + if(dataFormat == clFFT_SplitComplexFormat) { + localString += string("out_real += indexOut;\n"); + localString += string("out_imag += indexOut;\n"); + for(k = 0; k < R1; k++) + localString += string("out_real[") + num2str(((k%R2)*R1 + (k/R2))*strideO) + string("] = a[") + num2str(k) + string("].x;\n"); + for(k = 0; k < R1; k++) + localString += string("out_imag[") + num2str(((k%R2)*R1 + (k/R2))*strideO) + string("] = a[") + num2str(k) + string("].y;\n"); + } + else { + localString += string("out += indexOut;\n"); + for(k = 0; k < R1; k++) + localString += string("out[") + num2str(((k%R2)*R1 + (k/R2))*strideO) + string("] = a[") + num2str(k) + string("];\n"); + } + } + + insertHeader(*kernelString, kernelName, dataFormat); + *kernelString += string("{\n"); + if((*kInfo)->lmem_size) + *kernelString += string(" __local float sMem[") + num2str((*kInfo)->lmem_size) + string("];\n"); + *kernelString += localString; + *kernelString += string("}\n"); + + N /= radix; + kInfo = &(*kInfo)->next; + kCount++; + } +} + +void FFT1D(cl_fft_plan *plan, cl_fft_kernel_dir dir) +{ + unsigned int radixArray[10]; + unsigned int numRadix; + + switch(dir) + { + case cl_fft_kernel_x: + if(plan->n.x > plan->max_localmem_fft_size) + { + createGlobalFFTKernelString(plan, plan->n.x, 1, cl_fft_kernel_x, 1); + } + else if(plan->n.x > 1) + { + getRadixArray(plan->n.x, radixArray, &numRadix, 0); + if(plan->n.x / radixArray[0] <= plan->max_work_item_per_workgroup) + { + createLocalMemfftKernelString(plan); + } + else + { + getRadixArray(plan->n.x, radixArray, &numRadix, plan->max_radix); + if(plan->n.x / radixArray[0] <= plan->max_work_item_per_workgroup) + createLocalMemfftKernelString(plan); + else + createGlobalFFTKernelString(plan, plan->n.x, 1, cl_fft_kernel_x, 1); + } + } + break; + + case cl_fft_kernel_y: + if(plan->n.y > 1) + createGlobalFFTKernelString(plan, plan->n.y, plan->n.x, cl_fft_kernel_y, 1); + break; + + case cl_fft_kernel_z: + if(plan->n.z > 1) + createGlobalFFTKernelString(plan, plan->n.z, plan->n.x*plan->n.y, cl_fft_kernel_z, 1); + default: + return; + } +} + diff --git a/RTCP/GPUProc/OpenCL_FFT/src/fft_setup.cpp b/RTCP/GPUProc/OpenCL_FFT/src/fft_setup.cpp new file mode 100644 index 0000000000000000000000000000000000000000..98564838f6849f47625c3c2e8dd381d6b1888e56 --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/fft_setup.cpp @@ -0,0 +1,402 @@ + +// +// File: fft_setup.cpp +// +// Version: <1.0> +// +// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") +// in consideration of your agreement to the following terms, and your use, +// installation, modification or redistribution of this Apple software +// constitutes acceptance of these terms. If you do not agree with these +// terms, please do not use, install, modify or redistribute this Apple +// software. +// +// In consideration of your agreement to abide by the following terms, and +// subject to these terms, Apple grants you a personal, non - exclusive +// license, under Apple's copyrights in this original Apple software ( the +// "Apple Software" ), to use, reproduce, modify and redistribute the Apple +// Software, with or without modifications, in source and / or binary forms; +// provided that if you redistribute the Apple Software in its entirety and +// without modifications, you must retain this notice and the following text +// and disclaimers in all such redistributions of the Apple Software. Neither +// the name, trademarks, service marks or logos of Apple Inc. may be used to +// endorse or promote products derived from the Apple Software without specific +// prior written permission from Apple. Except as expressly stated in this +// notice, no other rights or licenses, express or implied, are granted by +// Apple herein, including but not limited to any patent rights that may be +// infringed by your derivative works or by other works in which the Apple +// Software may be incorporated. +// +// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO +// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED +// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION +// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. +// +// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR +// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION +// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER +// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR +// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +#include "fft_internal.h" +#include "fft_base_kernels.h" +#include <climits> +#include <cstdlib> +#include <cstring> +#include <sys/types.h> +#include <sys/stat.h> +#include <iostream> +#include <string> +#include <sstream> + +using namespace std; + +extern void getKernelWorkDimensions(cl_fft_plan *plan, cl_fft_kernel_info *kernelInfo, cl_int *batchSize, size_t *gWorkItems, size_t *lWorkItems); + +static void +getBlockConfigAndKernelString(cl_fft_plan *plan) +{ + plan->temp_buffer_needed = 0; + *plan->kernel_string += baseKernels; + + if(plan->format == clFFT_SplitComplexFormat) + *plan->kernel_string += twistKernelPlannar; + else + *plan->kernel_string += twistKernelInterleaved; + + switch(plan->dim) + { + case clFFT_1D: + FFT1D(plan, cl_fft_kernel_x); + break; + + case clFFT_2D: + FFT1D(plan, cl_fft_kernel_x); + FFT1D(plan, cl_fft_kernel_y); + break; + + case clFFT_3D: + FFT1D(plan, cl_fft_kernel_x); + FFT1D(plan, cl_fft_kernel_y); + FFT1D(plan, cl_fft_kernel_z); + break; + + default: + return; + } + + plan->temp_buffer_needed = 0; + cl_fft_kernel_info *kInfo = plan->kernel_info; + while(kInfo) + { + plan->temp_buffer_needed |= !kInfo->in_place_possible; + kInfo = kInfo->next; + } +} + + +static void +deleteKernelInfo(cl_fft_kernel_info *kInfo) +{ + if(kInfo) + { + if(kInfo->kernel_name) + free(kInfo->kernel_name); + if(kInfo->kernel) + clReleaseKernel(kInfo->kernel); + free(kInfo); + } +} + +static void +destroy_plan(cl_fft_plan *Plan) +{ + cl_fft_kernel_info *kernel_info = Plan->kernel_info; + + while(kernel_info) + { + cl_fft_kernel_info *tmp = kernel_info->next; + deleteKernelInfo(kernel_info); + kernel_info = tmp; + } + + Plan->kernel_info = NULL; + + if(Plan->kernel_string) + { + delete Plan->kernel_string; + Plan->kernel_string = NULL; + } + if(Plan->twist_kernel) + { + clReleaseKernel(Plan->twist_kernel); + Plan->twist_kernel = NULL; + } + if(Plan->program) + { + clReleaseProgram(Plan->program); + Plan->program = NULL; + } + if(Plan->tempmemobj) + { + clReleaseMemObject(Plan->tempmemobj); + Plan->tempmemobj = NULL; + } + if(Plan->tempmemobj_real) + { + clReleaseMemObject(Plan->tempmemobj_real); + Plan->tempmemobj_real = NULL; + } + if(Plan->tempmemobj_imag) + { + clReleaseMemObject(Plan->tempmemobj_imag); + Plan->tempmemobj_imag = NULL; + } +} + +static int +createKernelList(cl_fft_plan *plan) +{ + cl_program program = plan->program; + cl_fft_kernel_info *kernel_info = plan->kernel_info; + + cl_int err; + while(kernel_info) + { + kernel_info->kernel = clCreateKernel(program, kernel_info->kernel_name, &err); + if(!kernel_info->kernel || err != CL_SUCCESS) + return err; + kernel_info = kernel_info->next; + } + + if(plan->format == clFFT_SplitComplexFormat) + plan->twist_kernel = clCreateKernel(program, "clFFT_1DTwistSplit", &err); + else + plan->twist_kernel = clCreateKernel(program, "clFFT_1DTwistInterleaved", &err); + + if(!plan->twist_kernel || err) + return err; + + return CL_SUCCESS; +} + +int getMaxKernelWorkGroupSize(cl_fft_plan *plan, unsigned int *max_wg_size, unsigned int num_devices, cl_device_id *devices) +{ + int reg_needed = 0; + *max_wg_size = INT_MAX; + int err; + size_t wg_size; + + unsigned int i; + for(i = 0; i < num_devices; i++) + { + cl_fft_kernel_info *kInfo = plan->kernel_info; + while(kInfo) + { + err = clGetKernelWorkGroupInfo(kInfo->kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &wg_size, NULL); + if(err != CL_SUCCESS) + return -1; + + if(wg_size < kInfo->num_workitems_per_workgroup) + reg_needed |= 1; + + if(*max_wg_size > wg_size) + *max_wg_size = wg_size; + + kInfo = kInfo->next; + } + } + + return reg_needed; +} + +#define ERR_MACRO(err) { \ + if( err != CL_SUCCESS) \ + { \ + if(error_code) \ + *error_code = err; \ + clFFT_DestroyPlan((clFFT_Plan) plan); \ + return (clFFT_Plan) NULL; \ + } \ + } + +clFFT_Plan +clFFT_CreatePlan(cl_context context, clFFT_Dim3 n, clFFT_Dimension dim, clFFT_DataFormat dataFormat, cl_int *error_code ) +{ + int i; + cl_int err; + int isPow2 = 1; + cl_fft_plan *plan = NULL; + ostringstream kString; + int num_devices; + int gpu_found = 0; + cl_device_id devices[16]; + size_t ret_size; + cl_device_type device_type; + + if(!context) + ERR_MACRO(CL_INVALID_VALUE); + + isPow2 |= n.x && !( (n.x - 1) & n.x ); + isPow2 |= n.y && !( (n.y - 1) & n.y ); + isPow2 |= n.z && !( (n.z - 1) & n.z ); + + if(!isPow2) + ERR_MACRO(CL_INVALID_VALUE); + + if( (dim == clFFT_1D && (n.y != 1 || n.z != 1)) || (dim == clFFT_2D && n.z != 1) ) + ERR_MACRO(CL_INVALID_VALUE); + + plan = (cl_fft_plan *) malloc(sizeof(cl_fft_plan)); + if(!plan) + ERR_MACRO(CL_OUT_OF_RESOURCES); + + plan->context = context; + clRetainContext(context); + plan->n = n; + plan->dim = dim; + plan->format = dataFormat; + plan->kernel_info = 0; + plan->num_kernels = 0; + plan->twist_kernel = 0; + plan->program = 0; + plan->temp_buffer_needed = 0; + plan->last_batch_size = 0; + plan->tempmemobj = 0; + plan->tempmemobj_real = 0; + plan->tempmemobj_imag = 0; + plan->max_localmem_fft_size = 2048; + plan->max_work_item_per_workgroup = 256; + plan->max_radix = 16; + plan->min_mem_coalesce_width = 16; + plan->num_local_mem_banks = 16; + +patch_kernel_source: + + plan->kernel_string = new string(""); + if(!plan->kernel_string) + ERR_MACRO(CL_OUT_OF_RESOURCES); + + getBlockConfigAndKernelString(plan); + + const char *source_str = plan->kernel_string->c_str(); + plan->program = clCreateProgramWithSource(context, 1, (const char**) &source_str, NULL, &err); + ERR_MACRO(err); + + err = clGetContextInfo(context, CL_CONTEXT_DEVICES, sizeof(devices), devices, &ret_size); + ERR_MACRO(err); + + num_devices = ret_size / sizeof(cl_device_id); + + for(i = 0; i < num_devices; i++) + { + err = clGetDeviceInfo(devices[i], CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL); + ERR_MACRO(err); + + if(device_type == CL_DEVICE_TYPE_CPU || device_type == CL_DEVICE_TYPE_GPU) + { + gpu_found = 1; + err = clBuildProgram(plan->program, 1, &devices[i], "-cl-mad-enable", NULL, NULL); + if (err != CL_SUCCESS) + { + char *build_log; + char devicename[200]; + size_t log_size; + + err = clGetProgramBuildInfo(plan->program, devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); + ERR_MACRO(err); + + build_log = (char *) malloc(log_size + 1); + + err = clGetProgramBuildInfo(plan->program, devices[i], CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL); + ERR_MACRO(err); + + err = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(devicename), devicename, NULL); + ERR_MACRO(err); + + fprintf(stdout, "FFT program build log on device %s\n", devicename); + fprintf(stdout, "%s\n", build_log); + free(build_log); + + ERR_MACRO(err); + } + } + } + + if(!gpu_found) + ERR_MACRO(CL_INVALID_CONTEXT); + + err = createKernelList(plan); + ERR_MACRO(err); + + // we created program and kernels based on "some max work group size (default 256)" ... this work group size + // may be larger than what kernel may execute with ... if thats the case we need to regenerate the kernel source + // setting this as limit i.e max group size and rebuild. + unsigned int max_kernel_wg_size; + int patching_req = getMaxKernelWorkGroupSize(plan, &max_kernel_wg_size, num_devices, devices); + if(patching_req == -1) + { + ERR_MACRO(err); + } + + if(patching_req) + { + destroy_plan(plan); + plan->max_work_item_per_workgroup = max_kernel_wg_size; + goto patch_kernel_source; + } + + cl_fft_kernel_info *kInfo = plan->kernel_info; + while(kInfo) + { + plan->num_kernels++; + kInfo = kInfo->next; + } + + if(error_code) + *error_code = CL_SUCCESS; + + return (clFFT_Plan) plan; +} + +void +clFFT_DestroyPlan(clFFT_Plan plan) +{ + cl_fft_plan *Plan = (cl_fft_plan *) plan; + if(Plan) + { + destroy_plan(Plan); + clReleaseContext(Plan->context); + free(Plan); + } +} + +void clFFT_DumpPlan( clFFT_Plan Plan, FILE *file) +{ + size_t gDim, lDim; + FILE *out; + if(!file) + out = stdout; + else + out = file; + + cl_fft_plan *plan = (cl_fft_plan *) Plan; + cl_fft_kernel_info *kInfo = plan->kernel_info; + + while(kInfo) + { + cl_int s = 1; + getKernelWorkDimensions(plan, kInfo, &s, &gDim, &lDim); + fprintf(out, "Run kernel %s with global dim = {%zd*BatchSize}, local dim={%zd}\n", kInfo->kernel_name, gDim, lDim); + kInfo = kInfo->next; + } + fprintf(out, "%s\n", plan->kernel_string->c_str()); +} diff --git a/RTCP/GPUProc/OpenCL_FFT/src/libOpenCL_FFT.a.not b/RTCP/GPUProc/OpenCL_FFT/src/libOpenCL_FFT.a.not new file mode 100644 index 0000000000000000000000000000000000000000..595b15d060a75485d1668d9fd6271c350eec7afb Binary files /dev/null and b/RTCP/GPUProc/OpenCL_FFT/src/libOpenCL_FFT.a.not differ diff --git a/RTCP/GPUProc/OpenCL_FFT/src/main.cpp b/RTCP/GPUProc/OpenCL_FFT/src/main.cpp new file mode 100755 index 0000000000000000000000000000000000000000..bc6b956137faedcb2a58c25e1d81d3aedd8a4568 --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/main.cpp @@ -0,0 +1,882 @@ + +// +// File: main.cpp +// +// Version: <1.0> +// +// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") +// in consideration of your agreement to the following terms, and your use, +// installation, modification or redistribution of this Apple software +// constitutes acceptance of these terms. If you do not agree with these +// terms, please do not use, install, modify or redistribute this Apple +// software. +// +// In consideration of your agreement to abide by the following terms, and +// subject to these terms, Apple grants you a personal, non - exclusive +// license, under Apple's copyrights in this original Apple software ( the +// "Apple Software" ), to use, reproduce, modify and redistribute the Apple +// Software, with or without modifications, in source and / or binary forms; +// provided that if you redistribute the Apple Software in its entirety and +// without modifications, you must retain this notice and the following text +// and disclaimers in all such redistributions of the Apple Software. Neither +// the name, trademarks, service marks or logos of Apple Inc. may be used to +// endorse or promote products derived from the Apple Software without specific +// prior written permission from Apple. Except as expressly stated in this +// notice, no other rights or licenses, express or implied, are granted by +// Apple herein, including but not limited to any patent rights that may be +// infringed by your derivative works or by other works in which the Apple +// Software may be incorporated. +// +// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO +// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED +// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION +// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. +// +// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR +// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION +// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER +// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR +// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +#include <string.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <CL/cl.h> +#include "clFFT.h" +//#include <mach/mach_time.h> +#include <Accelerate/Accelerate.h> +#include "procs.h" +#include <sys/types.h> +#include <sys/stat.h> +#include <stdint.h> +#include <float.h> + +#define eps_avg 10.0 + +#define MAX( _a, _b) ((_a)>(_b)?(_a) : (_b)) + +typedef enum { + clFFT_OUT_OF_PLACE, + clFFT_IN_PLACE, +}clFFT_TestType; + +typedef struct +{ + double real; + double imag; +}clFFT_ComplexDouble; + +typedef struct +{ + double *real; + double *imag; +}clFFT_SplitComplexDouble; + +cl_device_id device_id; +cl_context context; +cl_command_queue queue; + +typedef unsigned long long ulong; + +double subtractTimes( uint64_t endTime, uint64_t startTime ) +{ + uint64_t difference = endTime - startTime; + static double conversion = 0.0; + + if( conversion == 0.0 ) + { + mach_timebase_info_data_t info; + kern_return_t err = mach_timebase_info( &info ); + + //Convert the timebase into seconds + if( err == 0 ) + conversion = 1e-9 * (double) info.numer / (double) info.denom; + } + + return conversion * (double) difference; +} + +void computeReferenceF(clFFT_SplitComplex *out, clFFT_Dim3 n, + unsigned int batchSize, clFFT_Dimension dim, clFFT_Direction dir) +{ + FFTSetup plan_vdsp; + DSPSplitComplex out_vdsp; + FFTDirection dir_vdsp = dir == clFFT_Forward ? FFT_FORWARD : FFT_INVERSE; + + unsigned int i, j, k; + unsigned int stride; + unsigned int log2Nx = (unsigned int) log2(n.x); + unsigned int log2Ny = (unsigned int) log2(n.y); + unsigned int log2Nz = (unsigned int) log2(n.z); + unsigned int log2N; + + log2N = log2Nx; + log2N = log2N > log2Ny ? log2N : log2Ny; + log2N = log2N > log2Nz ? log2N : log2Nz; + + plan_vdsp = vDSP_create_fftsetup(log2N, 2); + + switch(dim) + { + case clFFT_1D: + + for(i = 0; i < batchSize; i++) + { + stride = i * n.x; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp); + } + break; + + case clFFT_2D: + + for(i = 0; i < batchSize; i++) + { + for(j = 0; j < n.y; j++) + { + stride = j * n.x + i * n.x * n.y; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp); + } + } + for(i = 0; i < batchSize; i++) + { + for(j = 0; j < n.x; j++) + { + stride = j + i * n.x * n.y; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp); + } + } + break; + + case clFFT_3D: + + for(i = 0; i < batchSize; i++) + { + for(j = 0; j < n.z; j++) + { + for(k = 0; k < n.y; k++) + { + stride = k * n.x + j * n.x * n.y + i * n.x * n.y * n.z; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zip(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp); + } + } + } + for(i = 0; i < batchSize; i++) + { + for(j = 0; j < n.z; j++) + { + for(k = 0; k < n.x; k++) + { + stride = k + j * n.x * n.y + i * n.x * n.y * n.z; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp); + } + } + } + for(i = 0; i < batchSize; i++) + { + for(j = 0; j < n.y; j++) + { + for(k = 0; k < n.x; k++) + { + stride = k + j * n.x + i * n.x * n.y * n.z; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zip(plan_vdsp, &out_vdsp, n.x*n.y, log2Nz, dir_vdsp); + } + } + } + break; + } + + vDSP_destroy_fftsetup(plan_vdsp); +} + +void computeReferenceD(clFFT_SplitComplexDouble *out, clFFT_Dim3 n, + unsigned int batchSize, clFFT_Dimension dim, clFFT_Direction dir) +{ + FFTSetupD plan_vdsp; + DSPDoubleSplitComplex out_vdsp; + FFTDirection dir_vdsp = dir == clFFT_Forward ? FFT_FORWARD : FFT_INVERSE; + + unsigned int i, j, k; + unsigned int stride; + unsigned int log2Nx = (int) log2(n.x); + unsigned int log2Ny = (int) log2(n.y); + unsigned int log2Nz = (int) log2(n.z); + unsigned int log2N; + + log2N = log2Nx; + log2N = log2N > log2Ny ? log2N : log2Ny; + log2N = log2N > log2Nz ? log2N : log2Nz; + + plan_vdsp = vDSP_create_fftsetupD(log2N, 2); + + switch(dim) + { + case clFFT_1D: + + for(i = 0; i < batchSize; i++) + { + stride = i * n.x; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp); + } + break; + + case clFFT_2D: + + for(i = 0; i < batchSize; i++) + { + for(j = 0; j < n.y; j++) + { + stride = j * n.x + i * n.x * n.y; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp); + } + } + for(i = 0; i < batchSize; i++) + { + for(j = 0; j < n.x; j++) + { + stride = j + i * n.x * n.y; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp); + } + } + break; + + case clFFT_3D: + + for(i = 0; i < batchSize; i++) + { + for(j = 0; j < n.z; j++) + { + for(k = 0; k < n.y; k++) + { + stride = k * n.x + j * n.x * n.y + i * n.x * n.y * n.z; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zipD(plan_vdsp, &out_vdsp, 1, log2Nx, dir_vdsp); + } + } + } + for(i = 0; i < batchSize; i++) + { + for(j = 0; j < n.z; j++) + { + for(k = 0; k < n.x; k++) + { + stride = k + j * n.x * n.y + i * n.x * n.y * n.z; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x, log2Ny, dir_vdsp); + } + } + } + for(i = 0; i < batchSize; i++) + { + for(j = 0; j < n.y; j++) + { + for(k = 0; k < n.x; k++) + { + stride = k + j * n.x + i * n.x * n.y * n.z; + out_vdsp.realp = out->real + stride; + out_vdsp.imagp = out->imag + stride; + + vDSP_fft_zipD(plan_vdsp, &out_vdsp, n.x*n.y, log2Nz, dir_vdsp); + } + } + } + break; + } + + vDSP_destroy_fftsetupD(plan_vdsp); +} + +double complexNormSq(clFFT_ComplexDouble a) +{ + return (a.real * a.real + a.imag * a.imag); +} + +double computeL2Error(clFFT_SplitComplex *data, clFFT_SplitComplexDouble *data_ref, int n, int batchSize, double *max_diff, double *min_diff) +{ + int i, j; + double avg_norm = 0.0; + *max_diff = 0.0; + *min_diff = 0x1.0p1000; + + for(j = 0; j < batchSize; j++) + { + double norm_ref = 0.0; + double norm = 0.0; + for(i = 0; i < n; i++) + { + int index = j * n + i; + clFFT_ComplexDouble diff = (clFFT_ComplexDouble) { data_ref->real[index] - data->real[index], data_ref->imag[index] - data->imag[index] }; + double norm_tmp = complexNormSq(diff); + norm += norm_tmp; + norm_ref += (data_ref->real[index] * data_ref->real[index] + data_ref->imag[index] * data_ref->imag[index]); + } + double curr_norm = sqrt( norm / norm_ref ) / FLT_EPSILON; + avg_norm += curr_norm; + *max_diff = *max_diff < curr_norm ? curr_norm : *max_diff; + *min_diff = *min_diff > curr_norm ? curr_norm : *min_diff; + } + + return avg_norm / batchSize; +} + +void convertInterleavedToSplit(clFFT_SplitComplex *result_split, clFFT_Complex *data_cl, int length) +{ + int i; + for(i = 0; i < length; i++) { + result_split->real[i] = data_cl[i].real; + result_split->imag[i] = data_cl[i].imag; + } +} + +int runTest(clFFT_Dim3 n, int batchSize, clFFT_Direction dir, clFFT_Dimension dim, + clFFT_DataFormat dataFormat, int numIter, clFFT_TestType testType) +{ + cl_int err = CL_SUCCESS; + int iter; + double t; + + uint64_t t0, t1; + int mx = log2(n.x); + int my = log2(n.y); + int mz = log2(n.z); + + int length = n.x * n.y * n.z * batchSize; + + double gflops = 5e-9 * ((double)mx + (double)my + (double)mz) * (double)n.x * (double)n.y * (double)n.z * (double)batchSize * (double)numIter; + + clFFT_SplitComplex data_i_split = (clFFT_SplitComplex) { NULL, NULL }; + clFFT_SplitComplex data_cl_split = (clFFT_SplitComplex) { NULL, NULL }; + clFFT_Complex *data_i = NULL; + clFFT_Complex *data_cl = NULL; + clFFT_SplitComplexDouble data_iref = (clFFT_SplitComplexDouble) { NULL, NULL }; + clFFT_SplitComplexDouble data_oref = (clFFT_SplitComplexDouble) { NULL, NULL }; + + clFFT_Plan plan = NULL; + cl_mem data_in = NULL; + cl_mem data_out = NULL; + cl_mem data_in_real = NULL; + cl_mem data_in_imag = NULL; + cl_mem data_out_real = NULL; + cl_mem data_out_imag = NULL; + + if(dataFormat == clFFT_SplitComplexFormat) { + data_i_split.real = (float *) malloc(sizeof(float) * length); + data_i_split.imag = (float *) malloc(sizeof(float) * length); + data_cl_split.real = (float *) malloc(sizeof(float) * length); + data_cl_split.imag = (float *) malloc(sizeof(float) * length); + if(!data_i_split.real || !data_i_split.imag || !data_cl_split.real || !data_cl_split.imag) + { + err = -1; + log_error("Out-of-Resources\n"); + goto cleanup; + } + } + else { + data_i = (clFFT_Complex *) malloc(sizeof(clFFT_Complex)*length); + data_cl = (clFFT_Complex *) malloc(sizeof(clFFT_Complex)*length); + if(!data_i || !data_cl) + { + err = -2; + log_error("Out-of-Resouces\n"); + goto cleanup; + } + } + + data_iref.real = (double *) malloc(sizeof(double) * length); + data_iref.imag = (double *) malloc(sizeof(double) * length); + data_oref.real = (double *) malloc(sizeof(double) * length); + data_oref.imag = (double *) malloc(sizeof(double) * length); + if(!data_iref.real || !data_iref.imag || !data_oref.real || !data_oref.imag) + { + err = -3; + log_error("Out-of-Resources\n"); + goto cleanup; + } + + int i; + if(dataFormat == clFFT_SplitComplexFormat) { + for(i = 0; i < length; i++) + { + data_i_split.real[i] = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f; + data_i_split.imag[i] = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f; + data_cl_split.real[i] = 0.0f; + data_cl_split.imag[i] = 0.0f; + data_iref.real[i] = data_i_split.real[i]; + data_iref.imag[i] = data_i_split.imag[i]; + data_oref.real[i] = data_iref.real[i]; + data_oref.imag[i] = data_iref.imag[i]; + } + } + else { + for(i = 0; i < length; i++) + { + data_i[i].real = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f; + data_i[i].imag = 2.0f * (float) rand() / (float) RAND_MAX - 1.0f; + data_cl[i].real = 0.0f; + data_cl[i].imag = 0.0f; + data_iref.real[i] = data_i[i].real; + data_iref.imag[i] = data_i[i].imag; + data_oref.real[i] = data_iref.real[i]; + data_oref.imag[i] = data_iref.imag[i]; + } + } + + plan = clFFT_CreatePlan( context, n, dim, dataFormat, &err ); + if(!plan || err) + { + log_error("clFFT_CreatePlan failed\n"); + goto cleanup; + } + + //clFFT_DumpPlan(plan, stdout); + + if(dataFormat == clFFT_SplitComplexFormat) + { + data_in_real = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_i_split.real, &err); + if(!data_in_real || err) + { + log_error("clCreateBuffer failed\n"); + goto cleanup; + } + + data_in_imag = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_i_split.imag, &err); + if(!data_in_imag || err) + { + log_error("clCreateBuffer failed\n"); + goto cleanup; + } + + if(testType == clFFT_OUT_OF_PLACE) + { + data_out_real = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_cl_split.real, &err); + if(!data_out_real || err) + { + log_error("clCreateBuffer failed\n"); + goto cleanup; + } + + data_out_imag = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float), data_cl_split.imag, &err); + if(!data_out_imag || err) + { + log_error("clCreateBuffer failed\n"); + goto cleanup; + } + } + else + { + data_out_real = data_in_real; + data_out_imag = data_in_imag; + } + } + else + { + data_in = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float)*2, data_i, &err); + if(!data_in) + { + log_error("clCreateBuffer failed\n"); + goto cleanup; + } + if(testType == clFFT_OUT_OF_PLACE) + { + data_out = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, length*sizeof(float)*2, data_cl, &err); + if(!data_out) + { + log_error("clCreateBuffer failed\n"); + goto cleanup; + } + } + else + data_out = data_in; + } + + + err = CL_SUCCESS; + + t0 = mach_absolute_time(); + if(dataFormat == clFFT_SplitComplexFormat) + { + for(iter = 0; iter < numIter; iter++) + err |= clFFT_ExecutePlannar(queue, plan, batchSize, dir, data_in_real, data_in_imag, data_out_real, data_out_imag, 0, NULL, NULL); + } + else + { + for(iter = 0; iter < numIter; iter++) + err |= clFFT_ExecuteInterleaved(queue, plan, batchSize, dir, data_in, data_out, 0, NULL, NULL); + } + + err |= clFinish(queue); + + if(err) + { + log_error("clFFT_Execute\n"); + goto cleanup; + } + + t1 = mach_absolute_time(); + t = subtractTimes(t1, t0); + char temp[100]; + sprintf(temp, "GFlops achieved for n = (%d, %d, %d), batchsize = %d", n.x, n.y, n.z, batchSize); + log_perf(gflops / (float) t, 1, "GFlops/s", "%s", temp); + + if(dataFormat == clFFT_SplitComplexFormat) + { + err |= clEnqueueReadBuffer(queue, data_out_real, CL_TRUE, 0, length*sizeof(float), data_cl_split.real, 0, NULL, NULL); + err |= clEnqueueReadBuffer(queue, data_out_imag, CL_TRUE, 0, length*sizeof(float), data_cl_split.imag, 0, NULL, NULL); + } + else + { + err |= clEnqueueReadBuffer(queue, data_out, CL_TRUE, 0, length*sizeof(float)*2, data_cl, 0, NULL, NULL); + } + + if(err) + { + log_error("clEnqueueReadBuffer failed\n"); + goto cleanup; + } + + computeReferenceD(&data_oref, n, batchSize, dim, dir); + + double diff_avg, diff_max, diff_min; + if(dataFormat == clFFT_SplitComplexFormat) { + diff_avg = computeL2Error(&data_cl_split, &data_oref, n.x*n.y*n.z, batchSize, &diff_max, &diff_min); + if(diff_avg > eps_avg) + log_error("Test failed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min); + else + log_info("Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min); + } + else { + clFFT_SplitComplex result_split; + result_split.real = (float *) malloc(length*sizeof(float)); + result_split.imag = (float *) malloc(length*sizeof(float)); + convertInterleavedToSplit(&result_split, data_cl, length); + diff_avg = computeL2Error(&result_split, &data_oref, n.x*n.y*n.z, batchSize, &diff_max, &diff_min); + + if(diff_avg > eps_avg) + log_error("Test failed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min); + else + log_info("Test passed (n=(%d, %d, %d), batchsize=%d): %s Test: rel. L2-error = %f eps (max=%f eps, min=%f eps)\n", n.x, n.y, n.z, batchSize, (testType == clFFT_OUT_OF_PLACE) ? "out-of-place" : "in-place", diff_avg, diff_max, diff_min); + free(result_split.real); + free(result_split.imag); + } + +cleanup: + clFFT_DestroyPlan(plan); + if(dataFormat == clFFT_SplitComplexFormat) + { + if(data_i_split.real) + free(data_i_split.real); + if(data_i_split.imag) + free(data_i_split.imag); + if(data_cl_split.real) + free(data_cl_split.real); + if(data_cl_split.imag) + free(data_cl_split.imag); + + if(data_in_real) + clReleaseMemObject(data_in_real); + if(data_in_imag) + clReleaseMemObject(data_in_imag); + if(data_out_real && testType == clFFT_OUT_OF_PLACE) + clReleaseMemObject(data_out_real); + if(data_out_imag && clFFT_OUT_OF_PLACE) + clReleaseMemObject(data_out_imag); + } + else + { + if(data_i) + free(data_i); + if(data_cl) + free(data_cl); + + if(data_in) + clReleaseMemObject(data_in); + if(data_out && testType == clFFT_OUT_OF_PLACE) + clReleaseMemObject(data_out); + } + + if(data_iref.real) + free(data_iref.real); + if(data_iref.imag) + free(data_iref.imag); + if(data_oref.real) + free(data_oref.real); + if(data_oref.imag) + free(data_oref.imag); + + return err; +} + +bool ifLineCommented(const char *line) { + const char *Line = line; + while(*Line != '\0') + if((*Line == '/') && (*(Line + 1) == '/')) + return true; + else + Line++; + return false; +} + +cl_device_type getGlobalDeviceType() +{ + char *force_cpu = getenv( "CL_DEVICE_TYPE" ); + if( force_cpu != NULL ) + { + if( strcmp( force_cpu, "gpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_GPU" ) == 0 ) + return CL_DEVICE_TYPE_GPU; + else if( strcmp( force_cpu, "cpu" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_CPU" ) == 0 ) + return CL_DEVICE_TYPE_CPU; + else if( strcmp( force_cpu, "accelerator" ) == 0 || strcmp( force_cpu, "CL_DEVICE_TYPE_ACCELERATOR" ) == 0 ) + return CL_DEVICE_TYPE_ACCELERATOR; + else if( strcmp( force_cpu, "CL_DEVICE_TYPE_DEFAULT" ) == 0 ) + return CL_DEVICE_TYPE_DEFAULT; + } + // default + return CL_DEVICE_TYPE_GPU; +} + +void +notify_callback(const char *errinfo, const void *private_info, size_t cb, void *user_data) +{ + log_error( "%s\n", errinfo ); +} + +int +checkMemRequirements(clFFT_Dim3 n, int batchSize, clFFT_TestType testType, cl_ulong gMemSize) +{ + cl_ulong memReq = (testType == clFFT_OUT_OF_PLACE) ? 3 : 2; + memReq *= n.x*n.y*n.z*sizeof(clFFT_Complex)*batchSize; + memReq = memReq/1024/1024; + if(memReq >= gMemSize) + return -1; + return 0; +} + +int main (int argc, char * const argv[]) { + + test_start(); + + cl_ulong gMemSize; + clFFT_Direction dir = clFFT_Forward; + int numIter = 1; + clFFT_Dim3 n = { 1024, 1, 1 }; + int batchSize = 1; + clFFT_DataFormat dataFormat = clFFT_SplitComplexFormat; + clFFT_Dimension dim = clFFT_1D; + clFFT_TestType testType = clFFT_OUT_OF_PLACE; + cl_device_id device_ids[16]; + + FILE *paramFile; + + cl_int err; + unsigned int num_devices; + + cl_device_type device_type = getGlobalDeviceType(); + if(device_type != CL_DEVICE_TYPE_GPU) + { + log_info("Test only supported on DEVICE_TYPE_GPU\n"); + test_finish(); + exit(0); + } + + err = clGetDeviceIDs(NULL, device_type, sizeof(device_ids), device_ids, &num_devices); + if(err) + { + log_error("clGetComputeDevice failed\n"); + test_finish(); + return -1; + } + + device_id = NULL; + + unsigned int i; + for(i = 0; i < num_devices; i++) + { + cl_bool available; + err = clGetDeviceInfo(device_ids[i], CL_DEVICE_AVAILABLE, sizeof(cl_bool), &available, NULL); + if(err) + { + log_error("Cannot check device availability of device # %d\n", i); + } + + if(available) + { + device_id = device_ids[i]; + break; + } + else + { + char name[200]; + err = clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, sizeof(name), name, NULL); + if(err == CL_SUCCESS) + { + log_info("Device %s not available for compute\n", name); + } + else + { + log_info("Device # %d not available for compute\n", i); + } + } + } + + if(!device_id) + { + log_error("None of the devices available for compute ... aborting test\n"); + test_finish(); + return -1; + } + + context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); + if(!context || err) + { + log_error("clCreateContext failed\n"); + test_finish(); + return -1; + } + + queue = clCreateCommandQueue(context, device_id, 0, &err); + if(!queue || err) + { + log_error("clCreateCommandQueue() failed.\n"); + clReleaseContext(context); + test_finish(); + return -1; + } + + err = clGetDeviceInfo(device_id, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &gMemSize, NULL); + if(err) + { + log_error("Failed to get global mem size\n"); + clReleaseContext(context); + clReleaseCommandQueue(queue); + test_finish(); + return -2; + } + + gMemSize /= (1024*1024); + + char delim[] = " \n"; + char tmpStr[100]; + char line[200]; + char *param, *val; + int total_errors = 0; + if(argc == 1) { + log_error("Need file name with list of parameters to run the test\n"); + test_finish(); + return -1; + } + + if(argc == 2) { // arguments are supplied in a file with arguments for a single run are all on the same line + paramFile = fopen(argv[1], "r"); + if(!paramFile) { + log_error("Cannot open the parameter file\n"); + clReleaseContext(context); + clReleaseCommandQueue(queue); + test_finish(); + return -3; + } + while(fgets(line, 199, paramFile)) { + if(!strcmp(line, "") || !strcmp(line, "\n") || ifLineCommented(line)) + continue; + param = strtok(line, delim); + while(param) { + val = strtok(NULL, delim); + if(!strcmp(param, "-n")) { + sscanf(val, "%d", &n.x); + val = strtok(NULL, delim); + sscanf(val, "%d", &n.y); + val = strtok(NULL, delim); + sscanf(val, "%d", &n.z); + } + else if(!strcmp(param, "-batchsize")) + sscanf(val, "%d", &batchSize); + else if(!strcmp(param, "-dir")) { + sscanf(val, "%s", tmpStr); + if(!strcmp(tmpStr, "forward")) + dir = clFFT_Forward; + else if(!strcmp(tmpStr, "inverse")) + dir = clFFT_Inverse; + } + else if(!strcmp(param, "-dim")) { + sscanf(val, "%s", tmpStr); + if(!strcmp(tmpStr, "1D")) + dim = clFFT_1D; + else if(!strcmp(tmpStr, "2D")) + dim = clFFT_2D; + else if(!strcmp(tmpStr, "3D")) + dim = clFFT_3D; + } + else if(!strcmp(param, "-format")) { + sscanf(val, "%s", tmpStr); + if(!strcmp(tmpStr, "plannar")) + dataFormat = clFFT_SplitComplexFormat; + else if(!strcmp(tmpStr, "interleaved")) + dataFormat = clFFT_InterleavedComplexFormat; + } + else if(!strcmp(param, "-numiter")) + sscanf(val, "%d", &numIter); + else if(!strcmp(param, "-testtype")) { + sscanf(val, "%s", tmpStr); + if(!strcmp(tmpStr, "out-of-place")) + testType = clFFT_OUT_OF_PLACE; + else if(!strcmp(tmpStr, "in-place")) + testType = clFFT_IN_PLACE; + } + param = strtok(NULL, delim); + } + + if(checkMemRequirements(n, batchSize, testType, gMemSize)) { + log_info("This test cannot run because memory requirements canot be met by the available device\n"); + continue; + } + + err = runTest(n, batchSize, dir, dim, dataFormat, numIter, testType); + if (err) + total_errors++; + } + } + + clReleaseContext(context); + clReleaseCommandQueue(queue); + + test_finish(); + return total_errors; +} diff --git a/RTCP/GPUProc/OpenCL_FFT/src/param.txt b/RTCP/GPUProc/OpenCL_FFT/src/param.txt new file mode 100644 index 0000000000000000000000000000000000000000..595402aa437b07baa8936280909927a09eb6f6dd --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/param.txt @@ -0,0 +1,57 @@ + +// +// File: param.txt +// +// Version: <1.0> +// +// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") +// in consideration of your agreement to the following terms, and your use, +// installation, modification or redistribution of this Apple software +// constitutes acceptance of these terms. If you do not agree with these +// terms, please do not use, install, modify or redistribute this Apple +// software. +// +// In consideration of your agreement to abide by the following terms, and +// subject to these terms, Apple grants you a personal, non - exclusive +// license, under Apple's copyrights in this original Apple software ( the +// "Apple Software" ), to use, reproduce, modify and redistribute the Apple +// Software, with or without modifications, in source and / or binary forms; +// provided that if you redistribute the Apple Software in its entirety and +// without modifications, you must retain this notice and the following text +// and disclaimers in all such redistributions of the Apple Software. Neither +// the name, trademarks, service marks or logos of Apple Inc. may be used to +// endorse or promote products derived from the Apple Software without specific +// prior written permission from Apple. Except as expressly stated in this +// notice, no other rights or licenses, express or implied, are granted by +// Apple herein, including but not limited to any patent rights that may be +// infringed by your derivative works or by other works in which the Apple +// Software may be incorporated. +// +// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO +// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED +// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION +// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. +// +// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR +// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION +// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER +// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR +// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +-n 64 1 1 -batchsize 8192 -dir forward -dim 1D -format plannar -numiter 1000 -testtype out-of-place +-n 1024 1 1 -batchsize 8192 -dir forward -dim 1D -format plannar -numiter 1000 -testtype out-of-place +-n 1048576 1 1 -batchsize 4 -dir inverse -dim 1D -format interleaved -numiter 1000 -testtype out-of-place +-n 1024 512 1 -batchsize 8 -dir forward -dim 2D -format interleaved -numiter 1000 -testtype out-of-place +-n 128 128 128 -batchsize 1 -dir inverse -dim 3D -format interleaved -numiter 1000 -testtype out-of-place +-n 16384 1 1 -batchsize 4 -dir forward -dim 1D -format interleaved -numiter 1 -testtype in-place +-n 32 2048 1 -batchsize 8 -dir forward -dim 2D -format interleaved -numiter 1 -testtype in-place +-n 4096 64 1 -batchsize 4 -dir inverse -dim 2D -format plannar -numiter 1 -testtype in-place +-n 64 32 16 -batchsize 1 -dir inverse -dim 3D -format interleaved -numiter 1 -testtype out-of-place diff --git a/RTCP/GPUProc/OpenCL_FFT/src/procs.h b/RTCP/GPUProc/OpenCL_FFT/src/procs.h new file mode 100644 index 0000000000000000000000000000000000000000..f6028c9e5218900accc97fa3dddc19fed885e7df --- /dev/null +++ b/RTCP/GPUProc/OpenCL_FFT/src/procs.h @@ -0,0 +1,53 @@ + +// +// File: procs.h +// +// Version: <1.0> +// +// Disclaimer: IMPORTANT: This Apple software is supplied to you by Apple Inc. ("Apple") +// in consideration of your agreement to the following terms, and your use, +// installation, modification or redistribution of this Apple software +// constitutes acceptance of these terms. If you do not agree with these +// terms, please do not use, install, modify or redistribute this Apple +// software. +// +// In consideration of your agreement to abide by the following terms, and +// subject to these terms, Apple grants you a personal, non - exclusive +// license, under Apple's copyrights in this original Apple software ( the +// "Apple Software" ), to use, reproduce, modify and redistribute the Apple +// Software, with or without modifications, in source and / or binary forms; +// provided that if you redistribute the Apple Software in its entirety and +// without modifications, you must retain this notice and the following text +// and disclaimers in all such redistributions of the Apple Software. Neither +// the name, trademarks, service marks or logos of Apple Inc. may be used to +// endorse or promote products derived from the Apple Software without specific +// prior written permission from Apple. Except as expressly stated in this +// notice, no other rights or licenses, express or implied, are granted by +// Apple herein, including but not limited to any patent rights that may be +// infringed by your derivative works or by other works in which the Apple +// Software may be incorporated. +// +// The Apple Software is provided by Apple on an "AS IS" basis. APPLE MAKES NO +// WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION THE IMPLIED +// WARRANTIES OF NON - INFRINGEMENT, MERCHANTABILITY AND FITNESS FOR A +// PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND OPERATION +// ALONE OR IN COMBINATION WITH YOUR PRODUCTS. +// +// IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL OR +// CONSEQUENTIAL DAMAGES ( INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION ) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION, MODIFICATION +// AND / OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED AND WHETHER +// UNDER THEORY OF CONTRACT, TORT ( INCLUDING NEGLIGENCE ), STRICT LIABILITY OR +// OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Copyright ( C ) 2008 Apple Inc. All Rights Reserved. +// +//////////////////////////////////////////////////////////////////////////////////////////////////// + + +#define test_start() +#define log_perf(_number, _higherBetter, _numType, _format, ...) printf("Performance Number " _format " (in %s, %s): %g\n",##__VA_ARGS__, _numType, _higherBetter?"higher is better":"lower is better" , _number) +#define log_info printf +#define log_error printf +#define test_finish() diff --git a/RTCP/GPUProc/clAmdFft/appmlEnv.sh b/RTCP/GPUProc/clAmdFft/appmlEnv.sh new file mode 100755 index 0000000000000000000000000000000000000000..1752540a870c8cc213a24d0610241bec79af922d --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/appmlEnv.sh @@ -0,0 +1,24 @@ +#! /bin/bash +# Short script meant to automate the task of setting up a terminal window to +# use the APPML library + +# Verify that this script has been sourced, not directly executed +if [[ "${BASH_SOURCE[0]}" == "${0}" ]] +then + echo "This script is meant to be sourced '.', as it modifies environmental variables" + echo "Try running as: '. $(basename ${0})'" + exit +fi + +# This is a sequence of bash commands to get the directory of this script +scriptDir=$(dirname $(readlink -f ${BASH_SOURCE[0]})) +# echo Script dir is: ${scriptDir} + +# Bash regexp to determine if the terminal is set up to point to APPML +if [[ ${LD_LIBRARY_PATH} = *${scriptDir}/lib64:${scriptDir}/lib32* ]] +then + echo "APPML math libraries is set in LD_LIBRARY_PATH" +else + echo "Patching LD_LIBRARY_PATH to include APPML math libraries" + export LD_LIBRARY_PATH=${scriptDir}/lib64:${scriptDir}/lib32:${LD_LIBRARY_PATH} +fi diff --git a/RTCP/GPUProc/clAmdFft/bin32/clAmdFft.Client b/RTCP/GPUProc/clAmdFft/bin32/clAmdFft.Client new file mode 120000 index 0000000000000000000000000000000000000000..4c429f2ba6b154932f86938a7f85885fd7c7d73a --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/bin32/clAmdFft.Client @@ -0,0 +1 @@ +clAmdFft.Client-1.8.291 \ No newline at end of file diff --git a/RTCP/GPUProc/clAmdFft/bin32/clAmdFft.Client-1.8.291 b/RTCP/GPUProc/clAmdFft/bin32/clAmdFft.Client-1.8.291 new file mode 100755 index 0000000000000000000000000000000000000000..7f34771aed6d08c2fc6704228a854c21f9a9aa74 Binary files /dev/null and b/RTCP/GPUProc/clAmdFft/bin32/clAmdFft.Client-1.8.291 differ diff --git a/RTCP/GPUProc/clAmdFft/bin64/clAmdFft.Client b/RTCP/GPUProc/clAmdFft/bin64/clAmdFft.Client new file mode 120000 index 0000000000000000000000000000000000000000..4c429f2ba6b154932f86938a7f85885fd7c7d73a --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/bin64/clAmdFft.Client @@ -0,0 +1 @@ +clAmdFft.Client-1.8.291 \ No newline at end of file diff --git a/RTCP/GPUProc/clAmdFft/bin64/clAmdFft.Client-1.8.291 b/RTCP/GPUProc/clAmdFft/bin64/clAmdFft.Client-1.8.291 new file mode 100755 index 0000000000000000000000000000000000000000..8dbf34568a62e89262fe165bfe7edf5a621406d8 Binary files /dev/null and b/RTCP/GPUProc/clAmdFft/bin64/clAmdFft.Client-1.8.291 differ diff --git a/RTCP/GPUProc/clAmdFft/clAmdFft-1.8.291-Linux.tar.gz b/RTCP/GPUProc/clAmdFft/clAmdFft-1.8.291-Linux.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..8f495d1388576ab1abf15db4727c2a7c1a0d2f35 Binary files /dev/null and b/RTCP/GPUProc/clAmdFft/clAmdFft-1.8.291-Linux.tar.gz differ diff --git a/RTCP/GPUProc/clAmdFft/clAmdFft-EULA.txt b/RTCP/GPUProc/clAmdFft/clAmdFft-EULA.txt new file mode 100644 index 0000000000000000000000000000000000000000..8cf752a15d2d961d9c640ad0bd67efd695c150a3 --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/clAmdFft-EULA.txt @@ -0,0 +1,402 @@ +LICENSE AGREEMENT + +IMPORTANT-READ CAREFULLY: Do not install, copy or use the enclosed Materials +(defined below) until carefully reading and agreeing to the following terms +and conditions. This is a legal agreement ("Agreement") between you (either +an individual or an entity) ("You") and Advanced Micro Devices, Inc. +("AMD"). If You do not agree to the terms of this Agreement, do not install, +copy or use the Materials or any portion thereof. By installing, copying or +using the Materials provided herewith or that is made available by AMD to +download from any media, You agree to all of the terms of this Agreement. +Note that these Materials are AMD Confidential Information and may not be +shared with any third party except as expressly provided below. + +1. DEFINITIONS. + +In addition to those definitions set forth elsewhere in this Agreement, the +following terms have the meanings specified below: +a) "Distributed Software" means software developed or modified by You either +statically linked to Libraries or dynamically linked to Runtimes, and/or +derivative works of the Sample Source or modifiable Documentation. +b) "Documentation" means associated install scripts and online or electronic +documentation included as part of the deliverables in the Materials, or other +related materials or any portion thereof. +c) "Free Software License" means any software license that requires as a +condition of use, modification, adaptation or distribution of such licensed +software that other software derived from, distributed with or incorporated +into at the source code level be disclosed or distributed in Source Code +form. By way of example, Free Software License includes, but is in no way +limited to any of the following licenses or distribution models, or licenses +or distribution models similar to any of the following: (i) GNU's General +Public License (GPL) or Lesser/Library GPL (LGPL), (ii) The Artistic License +(e.g., PERL), (iii) the Mozilla Public License, (iv) the Netscape Public +License, (v) the Sun Community Source License (SCSL), and (vi) the Sun +Industry Standards Source License (SISSL). +d) "Intellectual Property Rights" means any rights under any patents, +trademarks, copyrights, mask works, trade secret information, intellectual +property, license or similar materials. +e) "Libraries" means libraries in Object Code included as part of the +deliverables in the Materials that may be statically linked into Your +software for the Licensed Purpose. +f) "Licensed Purpose" means: (i) test and evaluate the Materials internally; +(ii) use of the Materials to create Distributed Software; and (iii) +distributing and sublicensing to end users the Distributed Software and +Runtimes. +g) "Materials" means AMD Advanced Parallel Processing Math Library (APPML), +including but not limited to Documentation, Libraries, Runtimes, Object Code, +Sample Source and Tools. +h) "Object Code" means machine readable computer programming code files, +which is not in a human readable form and which does not include debug +symbols similar in detail to Source Code. +i) "Runtimes" means programs or dynamically linked libraries in Object Code +which are included as part of the deliverables in the Materials. +j) "Sample Source" means header files and sample code in Source Code form +which are included as part of the deliverables in the Materials. +k) "Tools" means any tools or utilities in the Materials. +l) "Source Code" means human readable form computer programming code and +related system level documentation, including all comments, symbols and any +procedural code such as job control language. + +2. LICENSE. Subject to the terms and conditions of this Agreement, AMD +hereby grants You a non-exclusive, royalty-free, revocable, non-transferable, +non-assignable limited copyright license to: +a) install, use and reproduce the Materials internally at Your site(s) +solely for the purpose of internal testing and evaluation; +b) modify the Sample Source or Documentation to create Distributed Software; +c) statically link the Libraries or dynamically link Runtimes to Your +Software; and +d) distribute and sublicense to end users in Object Code form only the +Distributed Software and Runtimes for the Licensed Purpose. Your right to +distribute the Distributed Software and Runtimes to end users includes the +right to distribute through distributors including multiple layers of +distributors. + +3. REQUIREMENTS. You will sublicense the end users to use Distributed +Software, Libraries and Runtimes in accordance with terms and conditions that +are substantially similar to the terms and conditions contained in Schedule A +hereof. You may include these terms in Your standard form agreement. You +must reproduce all AMD trademark and/or copyright notices on any copy of the +Distributed Software and Runtimes that You distribute. + +4. RESTRICTIONS. Restrictions regarding Your use of the Materials are as +follows. You may not: +a) distribute, publish or sublicense the Documentation, the Sample Source, +the Libraries (except when built into the Distributed Software), the Tools or +any Source Code in the Materials to anyone; +b) reproduce copies of the Materials other than what is reasonably required +for the Licensed Purpose; +c) decompile, reverse engineer, disassemble or otherwise reduce the Object +Code contained in the Materials to a human-perceivable form; +d) alter any copyright, trademark or patent notice(s) in the Materials; +e) use AMD's trademarks in Your software or product names or in a way that +suggests the Distributed Software comes from AMD or is endorsed by AMD; +f) use AMD's trademarks in Your software or product names or in a way that +that suggests that any of the Materials are endorsed by AMD; +g) include contents in malicious, deceptive or unlawful programs; +h) modify and/or distribute any of the Materials so that any part of thereof +becomes subject to a Free Software License; +i) use the Materials to enable, support or otherwise aid You or a third +party to develop technology competitive with the AMD technology embodied in +Materials or relating to the AMD products; or +j) rent, lease or lend the Materials or transfer the Materials to any third +party except as expressly provided herein. + +You also agree that the Materials are licensed, not sold by AMD. + +Except as expressly provided in Section 2, AMD does not grant, by +implication, estoppel or otherwise any other Intellectual Property Rights. +You agree that all licenses granted herein are conditioned upon the use of +the Materials for the Licensed Purpose. You agree that the Materials and all +partial versions thereto, including without limitation all modifications, +enhancements, updates, bug fixes, inventions, know-how, as well as all +Intellectual Property Rights and all other information relating thereto are +and will remain the sole and exclusive property of AMD. You shall have no +right, title or interest therein except for the limited licenses set forth in +Section 2 of this Agreement. AMD agrees that the foregoing shall not grant +AMD any right, title or interest in Your Distributed Software that is not +provided as part of the Materials, and Intellectual Property Rights therein +are and will remain Your sole and exclusive property. Nothing in this +Agreement shall be construed to limit AMD's right to independently develop or +acquire software or products similar to those of Your software or products +including any Intellectual Property Rights therein. + +The Materials may include third party technologies (e.g. third party +libraries) for which You must obtain licenses from parties other than AMD. +You agree that AMD has not obtained or conveyed to You--and that You shall be +responsible for obtaining--Intellectual Property Rights to use and/or +distribute the applicable, underlying Intellectual Property Rights related to +the third party technologies. These third party technologies are not +licensed as part of the Materials and are not licensed under this Agreement. + +Without limiting Section 10, You agree that AMD has no duty to defend You for +any infringement claims related to the standards and third party +technologies. You agree to indemnify and hold AMD harmless for any costs or +damages that result from such claims. + +5. NO SUPPORT. AMD is under no obligation to provide any kind of technical, +development or end-user support for the Materials. + +6. UPDATES. AMD may provide updates from time to time. If AMD provides +updates, these updates are licensed under the terms of this Agreement. + +7. FEEDBACK. You have no obligation to give AMD any suggestions, comments +or other feedback ("Feedback") relating to the Materials. However, AMD may +use and include any Feedback that You provide to improve the Materials or +other related AMD products and technologies. You grant AMD and its +affiliates and subsidiaries a worldwide, non-exclusive, irrevocable, +royalty-free, perpetual license to, directly or indirectly, use, reproduce, +license, sublicense, distribute, make, have made, sell and otherwise +commercialize the Feedback in the Materials or other AMD technologies. You +further agree not to provide any Feedback that (a) You know is subject to any +patent, copyright or other intellectual property claim or right of any third +party; (b) is subject to a Free Software License; or (c) is subject to +license terms which seek to require any products incorporating or derived +from such Feedback, or other AMD intellectual property, to be licensed to or +otherwise shared with any third party. + +8. CONFIDENTIALITY. You shall refrain from disclosing any Confidential +Information to third parties and will take reasonable security precautions, +at least as great as the precautions it takes to protect its own confidential +information, but no less than reasonable care, to keep confidential the +Confidential Information. For the purposes hereof, "Confidential +Information" means all information disclosed between the parties in +connection with this Agreement, including the Materials and any other +business or technical information provided to You by AMD. You will only +disclose the Confidential Information to Your employees or on-site +subcontractors (a) who have a need to know in furtherance of the Licensed +Purpose; and (b) who have signed a confidentiality agreement with You at +least as restrictive as this Agreement. If at any future time AMD, directly +or indirectly, discloses any other related technology or information to You, +including without limitation any updated versions of the Materials, such +disclosure will also be deemed to be confidential, part of the Materials and +will be subject to the provisions of this Agreement. You may disclose +Confidential Information in accordance with a judicial or other governmental +order, provided that You give AMD reasonable notice prior to such disclosure +to allow AMD a reasonable opportunity to seek a protective order or equivalent. + +9. DISCLAIMER OF WARRANTY. YOU EXPRESSLY ACKNOWLEDGES AND AGREES THAT USE +OF THE MATERIALS ARE AT YOUR SOLE RISK. THE MATERIALS ARE PROVIDED "AS IS" +AND WITHOUT WARRANTY OF ANY KIND AND AMD EXPRESSLY DISCLAIMS ALL WARRANTIES, +EXPRESS AND IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, +OR THOSE ARISING FROM CUSTOM OF TRADE OR COURSE OF USAGE. AMD DOES NOT +WARRANT THAT THE MATERIALS WILL MEET YOUR REQUIREMENTS, OR THAT THE OPERATION +OF THE MATERIALS WILL BE UNINTERRUPTED OR ERROR-FREE. THE ENTIRE RISK +ASSOCIATED WITH THE USE OF THE MATERIALS IS ASSUMED BY YOU. FURTHERMORE, AMD +DOES NOT WARRANT OR MAKE ANY REPRESENTATIONS REGARDING THE USE OR THE RESULTS +OF THE USE OF THE MATERIALS IN TERMS OF THEIR CORRECTNESS, ACCURACY, +RELIABILITY, CURRENTNESS, OR OTHERWISE. SHOULD THE CONTENTS OF THE MATERIALS +PROVE DEFECTIVE, YOU ASSUME THE ENTIRE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION OF +IMPLIED WARRANTIES, SO THE ABOVE EXCLUSION MAY NOT APPLY TO YOU. + +10. LIMITATION OF LIABILITY AND INDEMNIFICATION. IN NO EVENT SHALL, SHALL +AMD, OR ITS DIRECTORS, OFFICERS, EMPLOYEES OR AGENTS ("AUTHORIZED +REPRESENTATIVES"), ITS SUPPLIERS OR ITS LICENSORS, BE LIABLE TO YOU FOR ANY +PUNITIVE, DIRECT, INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES +(INCLUDING DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS +OF BUSINESS INFORMATION, AND THE LIKE) ARISING OUT OF THE USE, MISUSE OR +INABILITY TO USE THE MATERIALS, BREACH OR DEFAULT, INCLUDING THOSE ARISING +FROM INFRINGEMENT OR ALLEGED INFRINGEMENT OF ANY PATENT, TRADEMARK, COPYRIGHT +OR OTHER INTELLECTUAL PROPERTY RIGHT, BY AMD, EVEN IF AMD AND/OR ITS +AUTHORIZED REPRESENTATIVES HAVE BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. AMD WILL NOT BE LIABLE FOR LOSS OF, OR DAMAGE TO, YOUR EQUIPMENT, +RECORDS OR DATA OR ANY DAMAGES CLAIMED BY YOU BASED ON ANY THIRD PARTY +CLAIM. IN NO EVENT SHALL AMD'S TOTAL LIABILITY TO YOU FOR ALL DAMAGES, +LOSSES, AND CAUSES OF ACTION WHETHER IN CONTRACT, TORT (INCLUDING NEGLIGENCE) +EXCEED THE AMOUNT OF $10 USD. BY USING THE MATERIALS WITHOUT CHARGE, YOU +ACCEPT THIS ALLOCATION OF RISK. YOU AGREE TO DEFEND, INDEMNIFY AND HOLD +HARMLESS AMD AND ANY OF ITS AUTHORIZED REPRESENTATIVES FROM AND AGAINST ANY +AND ALL LOSS, DAMAGE, LIABILITY AND OTHER EXPENSES (INCLUDING REASONABLE +ATTORNEYS' FEES), ARISING OUT OF OR IN CONNECTION WITH ANY BREACH OF YOUR +OBLIGATIONS UNDER THIS AGREEMENT. + +11. TERMINATION. This Agreement is effective until terminated. You can +terminate this Agreement at any time by destroying the Materials, and all +copies You have made. This Agreement will terminate immediately without +notice from AMD if You fail to comply with any provision of this Agreement. +Upon termination You must destroy the Materials and all copies You have +made. The termination of this Agreement shall: (i) immediately result in the +termination of all sublicenses previously granted by You to third party +distributors and contract manufacturers under Section 3; and (ii) have no +effect on any sublicenses previously granted by You to end users under +Subsection 3, which sublicenses shall survive in accordance with their terms. + +12. GOVERNMENT END USERS. If You are acquiring the Materials on behalf of +any unit or agency of the United States Government, the following provisions +apply. The Government agrees the Materials were developed at private expense +and are provided with "RESTRICTED RIGHTS". Use, duplication, or disclosure +by the Government is subject to restrictions as set forth in DFARS +227.7202-1(a) and 227.7202-3(a) (1995), DFARS 252.227-7013(c) (1) (ii) (Oct +1988), FAR 12.212(a) (1995), FAR 52.227-19, (June 1987) or FAR 52.227-14(ALT +III) (June 1987), as amended from time to time. In the event that this +Agreement, or any part thereof, is deemed inconsistent with the minimum +rights identified in the Restricted Rights provisions, the minimum rights +shall prevail. + +13. EXPORT RESTRICTIONS. You shall adhere to all U.S. and other applicable +export laws, including but not limited to the U.S. Export Administration +Regulations ("EAR"), currently found at 15 C.F.R. Sections 730 through 744. +Further, pursuant to 15 C.F.R Section 740.6, You hereby certifies that, +except pursuant to a license granted by the United States Department of +Commerce Bureau of Industry and Security or as otherwise permitted pursuant +to a License Exception under the EAR, You will not (1) export, re-export or +release to a national of a country in Country Groups D:1 or E:2 any +restricted technology, software, or source code it receives from AMD, or (2) +export to Country Groups D:1 or E:2 the direct product of such technology or +software, if such foreign produced direct product is subject to national +security controls as identified on the Commerce Control List (currently found +in Supplement 1 to Part 774 of EAR). For the most current Country Group +listings, or for additional information about the EAR or Your obligations +under those regulations, please refer to the U.S. Bureau of Industry and +Security's website at http://www.bis.doc.gov/. These export requirements +shall survive any expiration or termination of this Agreement. + +14. CONTROLLING LAW AND SEVERABILITY. This Agreement will be governed by and +construed under the laws of the State of California without reference to its +conflicts of law principles. The rights and obligations under this Agreement +shall not be governed by the United Nations Convention on Contracts or the +International Sale of Goods, the application of which is expressly excluded. +Each party hereto submits to the jurisdiction of the state and federal courts +of Santa Clara County and the Northern District of California for the purpose +of all legal proceedings arising out of or relating to this Agreement or the +subject matter hereof. Each party waives any objection which it may have to +contest such forum. + +15. SURVIVING OBLIGATIONS. Sections 1, 3-16, inclusive, shall survive any +termination of this Agreement and shall bind the parties and their legal +representatives, successors, heirs and assigns. + +16. COMPLETE AGREEMENT. This Agreement constitutes the entire agreement +between the parties and supersedes any prior or contemporaneous oral or +written agreements with respect to the subject matter of this Agreement. No +waiver, amendment or modification of any provision of this Agreement will be +effective unless in writing and signed by the party against whom enforcement +is sought. + +If You agree to abide by the terms and conditions of this Agreement, please +press "Accept." If You do not agree to abide by the terms and conditions of +this Agreement and press "Decline," You may not use the Materials. +SCHEDULE A +END USER LICENSE AGREEMENT +PLEASE READ THIS LICENSE CAREFULLY BEFORE USING THE SOFTWARE. BY USING THE +SOFTWARE, YOU ARE AGREEING TO BE BOUND BY THE TERMS OF THIS LICENSE. IF YOU +DO NOT AGREE TO THESE TERMS AND CONDITIONS, DO NOT USE THE SOFTWARE. +1. License. The software accompanying this License (hereinafter "Software"), +regardless of the media on which it is distributed, are licensed to you by +Advanced Micro Devices, Inc. ("AMD"). You own the medium on which the +Software is recorded, but AMD and AMD's Licensors (referred to collectively +as "AMD") retain title to the Software and related documentation. You may: +a) use the Software.; and +b) make a reasonable number of copies necessary for the purposes of this +License. You must reproduce on such copy AMD's copyright notice and any +other proprietary legends that were on the original copy of the Software +2. Restrictions. The Software contains copyrighted and patented material, +trade secrets and other proprietary material. In order to protect them, and +except as permitted by applicable legislation, you may not: +a) decompile, reverse engineer, disassemble or otherwise reduce the Software +to a human-perceivable form; +b) modify, network, rent, lend, loan, distribute or create derivative works +based upon the Software in whole or in part; or +c) electronically transmit the Software from one computer to another or over +a network or otherwise transfer the Software except as permitted by this +License. +3. Termination. This License is effective until terminated. You may +terminate this License at any time by destroying the Software, related +documentation and all copies thereof. This License will terminate +immediately without notice from AMD if you fail to comply with any provision +of this License. Upon termination you must destroy the Software, related +documentation and all copies thereof. +4. Government End Users. If you are acquiring the Software on behalf of any +unit or agency of the United States Government, the following provisions +apply. The Government agrees the Software and documentation were developed +at private expense and are provided with "RESTRICTED RIGHTS". Use, +duplication, or disclosure by the Government is subject to restrictions as +set forth in DFARS 227.7202-1(a) and 227.7202-3(a) (1995), DFARS +252.227-7013(c)(1)(ii) (Oct 1988), FAR 12.212(a)(1995), FAR 52.227-19, (June +1987) or FAR 52.227-14(ALT III) (June 1987), as amended from time to time. +In the event that this License, or any part thereof, is deemed inconsistent +with the minimum rights identified in the Restricted Rights provisions, the +minimum rights shall prevail. +5. No Other License. No rights or licenses are granted by AMD under this +License, expressly or by implication, with respect to any proprietary +information or patent, copyright, trade secret or other intellectual property +right owned or controlled by AMD, except as expressly provided in this License. +6. Additional Licenses. DISTRIBUTION OR USE OF THE SOFTWARE WITH AN +OPERATING SYSTEM MAY REQUIRE ADDITIONAL LICENSES FROM THE OPERATING SYSTEM +VENDOR. Additional third party licenses may also be required and you agree +that you shall be solely responsible for obtaining such license rights. +7. Disclaimer of Warranty on Software. You expressly acknowledge and agree +that use of the Software is at your sole risk. The Software and related +documentation are provided "AS IS" and without warranty of any kind and AMD +EXPRESSLY DISCLAIMS ALL WARRANTIES, EXPRESS AND IMPLIED, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, ACCURACY, CONDITION, +OWNERSHIP, FITNESS FOR A PARTICULAR PURPOSE, AND/OR OF NON-INFRINGEMENT OF +THIRD PARTY INTELLECTUAL PROPERTY RIGHTS, AND THOSE ARISING FROM CUSTOM OR +TRADE OR COURSE OF USAGE. AMD DOES NOT WARRANT THAT THE FUNCTIONS CONTAINED +IN THE SOFTWARE WILL MEET YOUR REQUIREMENTS, OR THAT THE OPERATION OF THE +SOFTWARE WILL BE UNINTERRUPTED OR ERROR-FREE, OR THAT DEFECTS IN THE SOFTWARE +WILL BE CORRECTED. THE ENTIRE RISK AS TO THE RESULTS AND PERFORMANCE OF THE +SOFTWARE IS ASSUMED BY YOU. FURTHERMORE, AMD DOES NOT WARRANT OR MAKE ANY +REPRESENTATIONS REGARDING THE USE OR THE RESULTS OF THE USE OF THE SOFTWARE +OR RELATED DOCUMENTATION IN TERMS OF THEIR CORRECTNESS, ACCURACY, +RELIABILITY, CURRENTNESS, OR OTHERWISE. NO ORAL OR WRITTEN INFORMATION OR +ADVICE GIVEN BY AMD OR AMD'S AUTHORIZED REPRESENTATIVE SHALL CREATE A +WARRANTY OR IN ANY WAY INCREASE THE SCOPE OF THIS WARRANTY. SHOULD THE +SOFTWARE PROVE DEFECTIVE, YOU (AND NOT AMD OR AMD'S AUTHORIZED +REPRESENTATIVE) ASSUME THE ENTIRE COST OF ALL NECESSARY SERVICING, REPAIR OR +CORRECTION. THE SOFTWARE IS NOT INTENDED FOR USE IN MEDICAL, LIFE SAVING OR +LIFE SUSTAINING APPLICATIONS. SOME JURISDICTIONS DO NOT ALLOW THE EXCLUSION +OF IMPLIED WARRANTIES, SO THE ABOVE EXCLUSION MAY NOT APPLY TO YOU. +8. Limitation of Liability. UNDER NO CIRCUMSTANCES INCLUDING NEGLIGENCE, +SHALL AMD, OR ITS DIRECTORS, OFFICERS, EMPLOYEES OR AGENTS ("AUTHORIZED +REPRESENTATIVES"), BE LIABLE TO YOU FOR ANY PUNITIVE, EXEMPLARY, DIRECT, +INCIDENTAL, INDIRECT, SPECIAL OR CONSEQUENTIAL DAMAGES (INCLUDING DAMAGES FOR +LOSS OF BUSINESS PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS +INFORMATION, AND THE LIKE) ARISING OUT OF THE USE, MISUSE OR INABILITY TO USE +THE SOFTWARE OR RELATED DOCUMENTATION, BREACH OR DEFAULT, INCLUDING THOSE +ARISING FROM INFRINGEMENT OR ALLEGED INFRINGEMENT OF ANY PATENT, TRADEMARK, +COPYRIGHT OR OTHER INTELLECTUAL PROPERTY RIGHT, BY AMD, EVEN IF AMD OR AMD'S +AUTHORIZED REPRESENTATIVE HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. SOME JURISDICTIONS DO NOT ALLOW THE LIMITATION OR EXCLUSION OF +LIABILITY FOR INCIDENTAL OR CONSEQUENTIAL DAMAGES, SO THE ABOVE LIMITATION OR +EXCLUSION MAY NOT APPLY TO YOU. AMD will not be liable for: 1) loss of, or +damage to, your records or data; or 2) any damages claimed by you based on +any third party claim. In no event shall AMD's total liability to you for +all damages, losses, and causes of action (whether in contract, tort +(including negligence) or otherwise) exceed the amount paid by you for the +Software. +9. Export Restrictions. You shall adhere to all U.S. and other applicable +export laws, including but not limited to the U.S. Export Administration +Regulations (EAR), currently found at 15 C.F.R. Sections 730 through 744. +Further, pursuant to 15 C.F.R Section 740.6, You hereby certifies that, +except pursuant to a license granted by the United States Department of +Commerce Bureau of Industry and Security or as otherwise permitted pursuant +to a License Exception under the U.S. Export Administration Regulations +("EAR"), You will not (1) export, re-export or release to a national of a +country in Country Groups D:1 or E:2 any restricted technology, software, or +source code it receives from AMD, or (2) export to Country Groups D:1 or E:2 +the direct product of such technology or software, if such foreign produced +direct product is subject to national security controls as identified on the +Commerce Control List (currently found in Supplement 1 to Part 774 of EAR).� +For the most current Country Group listings, or for additional information +about the EAR or Recipient's obligations under those regulations, please +refer to the U.S. Bureau of Industry and Security's website at +http://www.bis.doc.gov/.� These export requirements shall survive any +expiration or termination of this Agreement. +10. Controlling Law and Severability. This Agreement will be governed by and +construed under the laws of the State of California without reference to its +conflicts of law principles. The rights and obligations under this Agreement +shall not be governed by the United Nations Convention on Contracts or the +International Sale of Goods, the application of which is expressly excluded. +Each party hereto submits to the jurisdiction of the state and federal courts +of Santa Clara County and the Northern District of California for the purpose +of all legal proceedings arising out of or relating to this Agreement or the +subject matter hereof. Each party waives any objection which it may have to +contest such forum. +11. Complete Agreement. This License constitutes the entire agreement +between the parties with respect to the use of the Software and the related +documentation, and supersedes all prior or contemporaneous understandings or +agreements, written or oral, regarding such subject matter. No amendment to +or modification of this License will be binding unless in writing and signed +by a duly authorized representative of AMD. \ No newline at end of file diff --git a/RTCP/GPUProc/clAmdFft/clAmdFft-README.txt b/RTCP/GPUProc/clAmdFft/clAmdFft-README.txt new file mode 100644 index 0000000000000000000000000000000000000000..87b218b6dc872afeb3bded9e58f473d868bd2892 --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/clAmdFft-README.txt @@ -0,0 +1,193 @@ +clAmdFft Readme + +Version: 1.8 +Release Date: September 2012 + +ChangeLog: + +____________ +Current version: +Fixed: + * Failures in real transforms seen on 7xxx series GPUs with certain + problem sizes involving powers of 3 and 5 + +Known Issues: + * Library may return invalid results on CPU devices. + +____________ +Version 1.8.276 (beta): +Fixed: + * Memory leaks affecting use cases where 'clAmdFftEnqueueTransform' is used in a loop + +____________ +Version 1.8.269 (beta): +New: + * clAmdFft now supports real-to-complex and complex-to-real transforms; + refer to documentation for details + * This release tested using the 12.4 Catalyst software suite + +Known Issues: + * Some degradation in performance of real transforms due to known + runtime/driver issues + * Failures in real transforms have been seen on 7xxx series GPUs with certain + problem sizes involving powers of 3 and 5 + +____________ +Version 1.6.244: +Fixed: + * Failures observed in v1.6.236 in backward transforms of certain power of 2 + (involving radix 4 and radix 8) problem sizes. + +____________ +Version 1.6.236: +New: + * Performance of the FFT library has been improved for Radix-2 1D and 2D transforms + * Support for R4XXX GPUs is deprecated and no longer tested + * Preview: Support for AMD Radeon� HD7000 series GPUs + * This release tested using the 8.92 runtime driver and the 2.6 APP SDK +____________ +Version 1.4: +New: + * clAmdFft now supports transform lengths whose factors consist exclusively + of powers of 2, 3, and 5 + * clAmdFft supports double precision data types + * clAmdFft executes on OpenCL 1.0 compliant devices + * This release tested using the 8.872 runtime driver and the 2.5 APP SDK + * A helper bash script appmlEnv.sh has been added to the root installation + directory to assist in properly setting up a terminal environment to + execute clAmdFft samples + +Fixed: + * If the library is required to allocate a temporary buffer, and the user does + not specify a temporary buffer on the Enqueue call, the library will + allocate a temporary buffer internally and the lifetime of that temporary + buffer is managed by the lifetime of the FFT plan; deleting the plan will + release the buffer. + * Test failures on CPU device for 32-bit systems (Windows/Linux) + +Known Issues: + * Failures have been seen on graphics cards using R4550 (RV710) GPUs. + +____________ +Version 1.2: +New: + * Reduced the number of internal LDS bank conflicts for our 1D FFT transforms, + increasing performance. + * Padded reads/writes to global memory, decreasing bank conflicts and + increasing performance on 2D transforms. + * This release tested using the 8.841 runtime driver and the 2.4 APP SDK + +Fixed: + * Failures have been seen attempting to queue work on the second GPU device on + a multi GPU 5970 card on Linux. + +Known Issues: + * It is recommended that users query for and explicitely create an + intermediate buffer if clAmdFft requires one. If the library creates the + intermediate buffer internally, a race condition may occur on freeing the + buffer on lower end hardware. + * Failures have been seen on graphics cards using R4550 (RV710) GPUs. + * Test failures on CPU device for 32-bit systems (Windows/Linux) + * It is recommended that windows users uninstall previous version of clAmdFft + before installing newer versions. Otherwise, Add/Remove programs only + removes the latest version. Linux users can delete the install directory. + +____________ +Version 1.0: + * Initial release, available on all platforms + +Known Issues: + * Failures have been seen attempting to queue work on the second GPU device on + a multi GPU 5970 card on Linux. +_____________________ +Building the Samples: + +To install the Linux versions of clAmdFft, uncompress the initial download and + then execute the install script. + +For example: + tar -xf clAmdFft-${version}.tar.gz + - This installs three files into the local directory, one being an + executable bash script. + + sudo mkdir /opt/clAmdFft-${version} + - This pre-creates the install directory with proper permissions in /opt + if it is to be installed there (This is the default). + + ./install-clAmdFft-${version}.sh + - This prints an EULA and uncompresses files into the chosen install + directory. + + cd ${installDir}/bin64 + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${OpenCLLibDir}:${clAmdFftLibDir} + - Export library dependencies to resolve all external linkages to the + client program. The user can create a bash script to help automate this + procedure. + + ./clAmdFft.Client -h + - Understand the command line options that are available to the user + through the sample client. + + ./clAmdFft.Client -iv + - Watch for the version strings to print out; watch for + 'Client Test *****PASS*****' to print out. + +The sample program does not ship with native build files. Instead, a CMake +file is shipped, and users generate a native build file for their system. + +For example: + cd ${installDir} + mkdir samplesBin/ + - This creates a sister directory to the samples directory that will house + the native makefiles and the generated files from the build. + + cd samplesBin/ + ccmake ../samples/ + - ccmake is a curses-based cmake program. It takes a parameter that + specifies the location of the source code to compile. + - Hit 'c' to configure for the platform; ensure that the dependencies to + external libraries are satisfied, including paths to 'ATI Stream SDK' + and 'Boost'. + - After dependencies are satisfied, hit 'c' again to finalize configure + step, then hit 'g' to generate makefile and exit ccmake. + + make help + - Look at the available options for make. + + make + - Build the sample client program. + + ./clAmdFft.Sample -iv + - Watch for the version strings to print out; watch for + 'Client Test *****PASS*****' to print out. +_______________________________________________________________________________ +(C) 2010,2011 Advanced Micro Devices, Inc. All rights reserved. AMD, the AMD +Arrow logo, ATI, the ATI logo, Radeon, FireStream, FireGL, Catalyst, and +combinations thereof are trademarks of Advanced Micro Devices, Inc. Microsoft +(R), Windows, and Windows Vista (R) are registered trademarks of Microsoft +Corporation in the U.S. and/or other jurisdictions. OpenCL and the OpenCL logo +are trademarks of Apple Inc. used by permission by Khronos. Other names are for +informational purposes only and may be trademarks of their respective owners. + +The contents of this document are provided in connection with Advanced Micro +Devices, Inc. ("AMD") products. AMD makes no representations or warranties with +respect to the accuracy or completeness of the contents of this publication and +reserves the right to make changes to specifications and product descriptions +at any time without notice. The information contained herein may be of a +preliminary or advance nature and is subject to change without notice. No +license, whether express, implied, arising by estoppel or otherwise, to any +intellectual property rights is granted by this publication. Except as set forth +in AMD's Standard Terms and Conditions of Sale, AMD assumes no liability +whatsoever, and disclaims any express or implied warranty, relating to its +products including, but not limited to, the implied warranty of +merchantability, fitness for a particular purpose, or infringement of any +intellectual property right. + +AMD's products are not designed, intended, authorized or warranted for use as +components in systems intended for surgical implant into the body, or in other +applications intended to support or sustain life, or in any other application +in which the failure of AMD's product could create a situation where personal +injury, death, or severe property or environmental damage may occur. AMD +reserves the right to discontinue or make changes to its products at any time +without notice. +_______________________________________________________________________________ diff --git a/RTCP/GPUProc/clAmdFft/clAmdFft1.8.291.tar.gz b/RTCP/GPUProc/clAmdFft/clAmdFft1.8.291.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5597c390b4227b9b394459bb6a4a53c1ef7181a Binary files /dev/null and b/RTCP/GPUProc/clAmdFft/clAmdFft1.8.291.tar.gz differ diff --git a/RTCP/GPUProc/clAmdFft/doc/clAmdFft.refman.pdf b/RTCP/GPUProc/clAmdFft/doc/clAmdFft.refman.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d36e16e22290cbece432bc0ffbce8293f6760205 Binary files /dev/null and b/RTCP/GPUProc/clAmdFft/doc/clAmdFft.refman.pdf differ diff --git a/RTCP/GPUProc/clAmdFft/include/clAmdFft.h b/RTCP/GPUProc/clAmdFft/include/clAmdFft.h new file mode 100644 index 0000000000000000000000000000000000000000..73e65d0d349eeb24d7d27997d78abe43661a244a --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/include/clAmdFft.h @@ -0,0 +1,573 @@ +/*********************************************************************** +** Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. +***********************************************************************/ + +/*! @file clAmdFft.h + * clAmdFft.h defines all of the public interfaces and types that are meant to be used by clFFT clients + * This is the one public header file that should be consumed by clFFT clients. It is written to adhere to native "C" + * interfaces to make clAmdFft library as portable as possible; it should be callable from C, C++, .NET and Fortran, + * either with the proper linking or using wrapper classes. + * + */ + +#pragma once +#if !defined( CLAMDFFT_DOTH ) +#define CLAMDFFT_DOTH + +#if defined(__APPLE__) || defined(__MACOSX) + #include <OpenCL/cl.h> +#else + #include <CL/cl.h> +#endif + +#include "clAmdFft.version.h" + +/*! This preprocessor definition is the standard way of making exporting APIs + * from a DLL simpler. All files within this DLL are compiled with the CLAMDFFT_EXPORTS + * symbol defined on the command line. This symbol should not be defined on any project + * that uses this DLL. This way any other project whose source files include this file see + * clAmdFft functions as being imported from a DLL, whereas this DLL sees symbols + * defined with this macro as being exported. + */ +#if defined( _WIN32 ) + #if !defined( __cplusplus ) + #define inline __inline + #endif + + #if defined( CLAMDFFT_EXPORTS ) + #define CLAMDFFTAPI __declspec( dllexport ) + #else + #define CLAMDFFTAPI __declspec( dllimport ) + #endif +#else + #define CLAMDFFTAPI +#endif + +/* In general, you can not use namespaces for strict C compliance, so we prefix our public accessible names + * with the string clAmdFft + */ + +/* All functions will return pre-defined error codes, and will NOT throw exceptions to the caller + */ + +/*! @brief clAmdFft error codes definition, incorporating OpenCL error definitions + * + * This enumeration is a superset of the OpenCL error codes. For example, CL_OUT_OF_HOST_MEMORY, + * which is defined in cl.h is aliased as CLFFT_OUT_OF_HOST_MEMORY. The set of basic OpenCL + * error codes is extended to add extra values specific to the clAmdFft package. + */ +enum clAmdFftStatus_ +{ + CLFFT_INVALID_GLOBAL_WORK_SIZE = CL_INVALID_GLOBAL_WORK_SIZE, + CLFFT_INVALID_MIP_LEVEL = CL_INVALID_MIP_LEVEL, + CLFFT_INVALID_BUFFER_SIZE = CL_INVALID_BUFFER_SIZE, + CLFFT_INVALID_GL_OBJECT = CL_INVALID_GL_OBJECT, + CLFFT_INVALID_OPERATION = CL_INVALID_OPERATION, + CLFFT_INVALID_EVENT = CL_INVALID_EVENT, + CLFFT_INVALID_EVENT_WAIT_LIST = CL_INVALID_EVENT_WAIT_LIST, + CLFFT_INVALID_GLOBAL_OFFSET = CL_INVALID_GLOBAL_OFFSET, + CLFFT_INVALID_WORK_ITEM_SIZE = CL_INVALID_WORK_ITEM_SIZE, + CLFFT_INVALID_WORK_GROUP_SIZE = CL_INVALID_WORK_GROUP_SIZE, + CLFFT_INVALID_WORK_DIMENSION = CL_INVALID_WORK_DIMENSION, + CLFFT_INVALID_KERNEL_ARGS = CL_INVALID_KERNEL_ARGS, + CLFFT_INVALID_ARG_SIZE = CL_INVALID_ARG_SIZE, + CLFFT_INVALID_ARG_VALUE = CL_INVALID_ARG_VALUE, + CLFFT_INVALID_ARG_INDEX = CL_INVALID_ARG_INDEX, + CLFFT_INVALID_KERNEL = CL_INVALID_KERNEL, + CLFFT_INVALID_KERNEL_DEFINITION = CL_INVALID_KERNEL_DEFINITION, + CLFFT_INVALID_KERNEL_NAME = CL_INVALID_KERNEL_NAME, + CLFFT_INVALID_PROGRAM_EXECUTABLE = CL_INVALID_PROGRAM_EXECUTABLE, + CLFFT_INVALID_PROGRAM = CL_INVALID_PROGRAM, + CLFFT_INVALID_BUILD_OPTIONS = CL_INVALID_BUILD_OPTIONS, + CLFFT_INVALID_BINARY = CL_INVALID_BINARY, + CLFFT_INVALID_SAMPLER = CL_INVALID_SAMPLER, + CLFFT_INVALID_IMAGE_SIZE = CL_INVALID_IMAGE_SIZE, + CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, + CLFFT_INVALID_MEM_OBJECT = CL_INVALID_MEM_OBJECT, + CLFFT_INVALID_HOST_PTR = CL_INVALID_HOST_PTR, + CLFFT_INVALID_COMMAND_QUEUE = CL_INVALID_COMMAND_QUEUE, + CLFFT_INVALID_QUEUE_PROPERTIES = CL_INVALID_QUEUE_PROPERTIES, + CLFFT_INVALID_CONTEXT = CL_INVALID_CONTEXT, + CLFFT_INVALID_DEVICE = CL_INVALID_DEVICE, + CLFFT_INVALID_PLATFORM = CL_INVALID_PLATFORM, + CLFFT_INVALID_DEVICE_TYPE = CL_INVALID_DEVICE_TYPE, + CLFFT_INVALID_VALUE = CL_INVALID_VALUE, + CLFFT_MAP_FAILURE = CL_MAP_FAILURE, + CLFFT_BUILD_PROGRAM_FAILURE = CL_BUILD_PROGRAM_FAILURE, + CLFFT_IMAGE_FORMAT_NOT_SUPPORTED = CL_IMAGE_FORMAT_NOT_SUPPORTED, + CLFFT_IMAGE_FORMAT_MISMATCH = CL_IMAGE_FORMAT_MISMATCH, + CLFFT_MEM_COPY_OVERLAP = CL_MEM_COPY_OVERLAP, + CLFFT_PROFILING_INFO_NOT_AVAILABLE = CL_PROFILING_INFO_NOT_AVAILABLE, + CLFFT_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY, + CLFFT_OUT_OF_RESOURCES = CL_OUT_OF_RESOURCES, + CLFFT_MEM_OBJECT_ALLOCATION_FAILURE = CL_MEM_OBJECT_ALLOCATION_FAILURE, + CLFFT_COMPILER_NOT_AVAILABLE = CL_COMPILER_NOT_AVAILABLE, + CLFFT_DEVICE_NOT_AVAILABLE = CL_DEVICE_NOT_AVAILABLE, + CLFFT_DEVICE_NOT_FOUND = CL_DEVICE_NOT_FOUND, + CLFFT_SUCCESS = CL_SUCCESS, + //-------------------------- Extended status codes for clAmdFft ---------------------------------------- + CLFFT_BUGCHECK = 4*1024, /*!< Bugcheck. */ + CLFFT_NOTIMPLEMENTED, /*!< Functionality is not implemented yet. */ + CLFFT_TRANSPOSED_NOTIMPLEMENTED, /*!< Transposed functionality is not implemented for this transformation. */ + CLFFT_FILE_NOT_FOUND, /*!< Tried to open an existing file on the host system, but failed. */ + CLFFT_FILE_CREATE_FAILURE, /*!< Tried to create a file on the host system, but failed. */ + CLFFT_VERSION_MISMATCH, /*!< Version conflict between client and library. */ + CLFFT_INVALID_PLAN, /*!< Requested plan could not be found. */ + CLFFT_DEVICE_NO_DOUBLE, /*!< Double precision not supported on this device. */ + CLFFT_ENDSTATUS /* This value will always be last, and marks the length of clAmdFftStatus. */ +}; +typedef enum clAmdFftStatus_ clAmdFftStatus; + +/*! @brief The dimension of the input and output buffers that will be fed into all FFT transforms */ +typedef enum clAmdFftDim_ +{ + CLFFT_1D = 1, /*!< 1 Dimensional FFT transform (default). */ + CLFFT_2D, /*!< 2 Dimensional FFT transform. */ + CLFFT_3D, /*!< 3 Dimensional FFT transform. */ + ENDDIMENSION /*!< This value will always be last, and marks the length of clAmdFftDim. */ +} clAmdFftDim; + +/*! @brief What are the expected layout's of the complex numbers <p> + * <b> For Release 1.0,</b> only the CLFFT_COMPLEX_INTERLEAVED and CLFFT_COMPLEX_PLANAR formats are supported. + * The real and hermitian formats should be supported in a future release. + */ +typedef enum clAmdFftLayout_ +{ + CLFFT_COMPLEX_INTERLEAVED = 1, /*!< An array of complex numbers, with real and imaginary components together (default). */ + CLFFT_COMPLEX_PLANAR, /*!< Arrays of real componets and arrays of imaginary components that have been seperated out. */ + CLFFT_HERMITIAN_INTERLEAVED, /*!< Compressed form of complex numbers; complex-conjugates not stored, real and imaginary components in same array. TODO: Document layout */ + CLFFT_HERMITIAN_PLANAR, /*!< Compressed form of complex numbers; complex-conjugates not stored, real and imaginary components in separate arrays. TODO: Document layout */ + CLFFT_REAL, /*!< An array of real numbers, with no corresponding imaginary components. */ + ENDLAYOUT /*!< This value will always be last, and marks the length of clAmdFftLayout. */ +} clAmdFftLayout; + +/*! @brief What is the expected precision of each FFT. + * @ref DistanceStridesandPitches + */ +typedef enum clAmdFftPrecision_ +{ + CLFFT_SINGLE = 1, /*!< An array of complex numbers, with real and imaginary components as floats (default). */ + CLFFT_DOUBLE, /*!< An array of complex numbers, with real and imaginary components as doubles. */ + CLFFT_SINGLE_FAST, /*!< Faster implementation preferred. */ + CLFFT_DOUBLE_FAST, /*!< Faster implementation preferred. */ + ENDPRECISION /*!< This value will always be last, and marks the length of clAmdFftPrecision. */ +} clAmdFftPrecision; + +/*! @brief What is the expected direction of each FFT, time or the frequency domains */ +typedef enum clAmdFftDirection_ +{ + CLFFT_FORWARD = -1, /*!< FFT transform from the time to the frequency domain. */ + CLFFT_BACKWARD = 1, /*!< FFT transform from the frequency to the time domain. */ + CLFFT_MINUS = -1, /*!< Alias for the forward transform. */ + CLFFT_PLUS = 1, /*!< Alias for the backward transform. */ + ENDDIRECTION /*!< This value will always be last, and marks the length of clAmdFftDirection. */ +} clAmdFftDirection; + +/*! @brief Are the input buffers overwritten with the results */ +typedef enum clAmdFftResultLocation_ +{ + CLFFT_INPLACE = 1, /*!< The input and output buffers are the same (default). */ + CLFFT_OUTOFPLACE, /*!< Seperate input and output buffers. */ + ENDPLACE /*!< This value will always be last, and marks the length of clAmdFftPlaceness. */ +} clAmdFftResultLocation; + +/*! @brief whether the result will be returned in original order; only valid for dimensions greater than 1 */ +typedef enum clAmdFftResultTransposed_ { + CLFFT_NOTRANSPOSE = 1, /*!< The results are returned in the original preserved order (default) */ + CLFFT_TRANSPOSED, /*!< The result is transposed where transpose kernel is supported (possibly faster) */ + ENDTRANSPOSED /*!< This value will always be last, and marks the length of clAmdFftResultTransposed */ +} clAmdFftResultTransposed; + +/*! BitMasks to be used with clAmdFftSetupData.debugFlags */ +#define CLFFT_DUMP_PROGRAMS 0x1 + +/*! @brief Data structure that can be passed to clAmdFftSetup() to control the behavior of the FFT runtime + * @details This structure contains values that can be initialized before instantiation of the FFT runtime + * with ::clAmdFftSetup(). To initialize this structure, pass a pointer to a user struct to ::clAmdFftInitSetupData( ), + * which will clear the structure and set the version member variables to the current values. + */ +struct clAmdFftSetupData_ +{ + cl_uint major; /*!< Major version number of the project; signifies major API changes. */ + cl_uint minor; /*!< Minor version number of the project; minor API changes that could break backwards compatibility. */ + cl_uint patch; /*!< Patch version number of the project; Always incrementing number, signifies change over time. */ + + /*! Bitwise flags that control the behavior of library debug logic. */ + cl_ulong debugFlags; /*! This should be set to zero, except when debugging the clAmdFft library. + * <p> debugFlags can be set to CLFFT_DUMP_PROGRAMS, in which case the dynamically generated OpenCL kernels will + * be written to text files in the current working directory. These files will have a *.cl suffix. + */ +}; +typedef struct clAmdFftSetupData_ clAmdFftSetupData; + +/*! @brief An abstract handle to the object that represents the state of the FFT(s) */ +typedef size_t clAmdFftPlanHandle; + +#ifdef __cplusplus +extern "C" { +#endif + /*! @brief Initialize an clAmdFftSetupData struct for the client + * @details clAmdFftSetupData is passed to clAmdFftSetup to control behavior of the FFT runtime + * @param[out] setupData Data structure is cleared, initialized with version information and default values + * @return Enum describing error condition; superset of OpenCL error codes + */ + inline clAmdFftStatus clAmdFftInitSetupData( clAmdFftSetupData* setupData ) + { + setupData->major = clAmdFftVersionMajor; + setupData->minor = clAmdFftVersionMinor; + setupData->patch = clAmdFftVersionPatch; + setupData->debugFlags = 0; + + return CLFFT_SUCCESS; + } + + /*! @brief Initialize internal FFT resources. + * @details AMD's FFT implementation caches kernels, programs and buffers for its internal use. + * @param[in] setupData Data structure that can be passed into the setup routine to control FFT generation behavior + * and debug functionality + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetup( const clAmdFftSetupData* setupData ); + + /*! @brief Release all internal resources. + * @details Call when client is done with this FFT library, allowing the library to destroy all resources it has cached + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftTeardown( ); + + /*! @brief Query the FFT library for version information + * @details Return the major, minor and patch version numbers associated with this FFT library + * @param[out] major Major functionality change + * @param[out] minor Minor functionality change + * @param[out] patch Bug fixes, documentation changes, no new features introduced + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetVersion( cl_uint* major, cl_uint* minor, cl_uint* patch ); + + /*! @brief Create a plan object initialized entirely with default values. + * @details A plan is a repository of state for calculating FFT's. Allows the runtime to pre-calculate kernels, programs + * and buffers and associate them with buffers of specified dimensions. + * @param[out] plHandle Handle to the newly created plan + * @param[in] context Client is responsible for providing an OpenCL context for the plan + * @param[in] dim The dimensionality of the FFT transform; describes how many elements are in the array + * @param[in] clLengths An array of lengths, of size 'dim'. Each value describes the length of additional dimensions + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftCreateDefaultPlan( clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim, + const size_t* clLengths ); + + /*! @brief Create a copy of an existing plan. + * @details This API allows a client to create a new plan based upon an existing plan. This is a convenience function + * provided for quickly creating plans that are similar, but may differ slightly. + * @param[out] out_plHandle Handle to the newly created plan that is based on in_plHandle + * @param[in] new_context Client is responsible for providing a new context for the new plan + * @param[in] in_plHandle Handle to a plan to be copied, previously created + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftCopyPlan( clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle ); + + /*! @brief Prepare the plan for execution. + * @details After all plan parameters are set, the client has the option of 'baking' the plan, which tells the runtime that + * no more changes to the plan's parameters are expected, and the OpenCL kernels should be compiled. This optional function + * allows the client application to perform this function when the application is being initialized instead of on the first + * execution. + * At this point, the clAmdFft runtime will apply all implimented optimizations, possibly including + * running kernel experiments on the devices in the plan context. + * <p> Users should assume that this function will take a long time to execute. If a plan is not baked before being executed, + * users should assume that the first call to clAmdFftEnqueueTransform will take a long time to execute. + * <p> If any significant parameter of a plan is changed after the plan is baked (by a subsequent call to one of + * the clAmdFftSetPlan____ functions), that will not be considered an error. Instead, the plan will revert back to + * the unbaked state, discarding the benefits of the baking operation. + * @param[in] plHandle Handle to a plan previously created + * @param[in] numQueues Number of command queues in commQueueFFT; 0 is a valid value, in which case client does not want + * the runtime to run load experiments and only pre-calculate state information + * @param[in] commQueueFFT An array of cl_command_queues created by the client; the command queues must be a proper subset of + * the devices included in the plan context + * @param[in] pfn_notify A function pointer to a notification routine. The notification routine is a callback function that + * an application can register and which will be called when the program executable has been built (successfully or unsuccessfully) + * <b> For Release 1.0,</b> this parameter MUST be NULL or nullptr. + * @param[in] user_data Passed as an argument when pfn_notify is called. + * <b> For Release 1.0,</b> this parameter MUST be NULL or nullptr. + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftBakePlan( clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, + void (CL_CALLBACK *pfn_notify)(clAmdFftPlanHandle plHandle, void *user_data), void* user_data ); + + /*! @brief Release the resources of a plan. + * @details A plan may include kernels, programs and buffers associated with it that consume memory. When a plan + * is not needed anymore, the client should release the plan. + * @param[in,out] plHandle Handle to a plan previously created + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftDestroyPlan( clAmdFftPlanHandle* plHandle ); + + /*! @brief Retrieve the OpenCL context of a previously created plan. + * @details User should pass a reference to an cl_context variable, which will be changed to point to a + * context set in the specified plan. + * @param[in] plHandle Handle to a plan previously created + * @param[out] context Reference to user allocated cl_context, which will point to context set in plan + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanContext( const clAmdFftPlanHandle plHandle, cl_context* context ); + + /*! @brief Retrieve the floating point precision of the FFT data + * @details User should pass a reference to an clAmdFftPrecision variable, which will be set to the + * precision of the FFT complex data in the plan. + * @param[in] plHandle Handle to a plan previously created + * @param[out] precision Reference to user clAmdFftPrecision enum + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanPrecision( const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision ); + + /*! @brief Set the floating point precision of the FFT data + * @details Set the plan property which will be the precision of the FFT complex data in the plan. + * @param[in] plHandle Handle to a plan previously created + * @param[in] precision Reference to user clAmdFftPrecision enum <p> + * <b> For Release 1.0,</b> only CLFFT_SINGLE and CLFFT_SINGLE_FAST are supported. + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanPrecision( clAmdFftPlanHandle plHandle, clAmdFftPrecision precision ); + + /*! @brief Retrieve the scaling factor that should be applied to the FFT data + * @details User should pass a reference to an cl_float variable, which will be set to the + * floating point scaling factor that will be multiplied across the FFT data. + * @param[in] plHandle Handle to a plan previously created + * @param[in] dir Which direction does the scaling factor apply to + * @param[out] scale Reference to user cl_float variable + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanScale( const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale ); + + /*! @brief Set the scaling factor that should be applied to the FFT data + * @details Set the plan property which will be the floating point scaling factor that will be + * multiplied across the FFT data. + * @param[in] plHandle Handle to a plan previously created + * @param[in] dir Which direction does the scaling factor apply to + * @param[in] scale Reference to user cl_float variable + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanScale( clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale ); + + /*! @brief Retrieve the number of discrete arrays that this plan can handle concurrently + * @details User should pass a reference to an cl_uint variable, which will be set to the + * number of discrete arrays (1D or 2D) that will be batched together for this plan + * @param[in] plHandle Handle to a plan previously created + * @param[out] batchSize How many discrete number of FFT's are to be performed + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanBatchSize( const clAmdFftPlanHandle plHandle, size_t* batchSize ); + + /*! @brief Set the number of discrete arrays that this plan can handle concurrently + * @details Set the plan property which will be set to the number of discrete arrays (1D or 2D) + * that will be batched together for this plan + * @param[in] plHandle Handle to a plan previously created + * @param[in] batchSize How many discrete number of FFT's are to be performed + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanBatchSize( clAmdFftPlanHandle plHandle, size_t batchSize ); + + /*! @brief Retrieve the dimensionality of FFT's to be transformed in the plan + * @details Queries a plan object and retrieves the dimensionality that the plan is set for. A size is returned to + * help the client allocate the proper storage to hold the dimensions in a further call to clAmdFftGetPlanLength + * @param[in] plHandle Handle to a plan previously created + * @param[out] dim The dimensionality of the FFT's to be transformed + * @param[out] size Value used to allocate an array to hold the FFT dimensions. + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanDim( const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size ); + + /*! @brief Set the dimensionality of FFT's to be transformed by the plan + * @details Set the dimensionality of FFT's to be transformed by the plan + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimensionality of the FFT's to be transformed + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanDim( clAmdFftPlanHandle plHandle, const clAmdFftDim dim ); + + /*! @brief Retrieve the length of each dimension of the FFT + * @details User should pass a reference to a size_t array, which will be set to the + * length of each discrete dimension of the FFT + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the length parameters; describes how many elements are in the array + * @param[out] clLengths An array of lengths, of size 'dim'. Each array value describes the length of each dimension + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanLength( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths ); + + /*! @brief Set the length of each dimension of the FFT + * @details Set the plan property which will be the length of each discrete dimension of the FFT + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the length parameters; describes how many elements are in the array + * @param[in] clLengths An array of lengths, of size 'dim'. Each value describes the length of additional dimensions + * <p><b> For Release 1.0, </b> All lengths must be powers of 2. Non-power-of-two dimensions should be supported in a future release. + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanLength( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths ); + + /*! @brief Retrieve the distance between consecutive elements for input buffers in a dimension. + * @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely + * ignored + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array + * @param[out] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. + * Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored contiguously. + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanInStride( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); + + /*! @brief Set the distance between consecutive elements for input buffers in a dimension. + * @details Set the plan properties which will be the distance between elements in a given dimension + * (units are in terms of clAmdFftPrecision) + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array + * @param[in] clStrides An array of strides, of size 'dim'. + * See @ref DistanceStridesandPitches for details. + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanInStride( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); + + /*! @brief Retrieve the distance between consecutive elements for output buffers in a dimension. + * @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely + * ignored + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array + * @param[out] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. + * Typically strideY=LenX, strideZ=LenX*LenYsuch that successive elements in the second and third dimensions are stored contiguously. + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanOutStride( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); + + /*! @brief Set the distance between consecutive elements for output buffers in a dimension. + * @details Set the plan properties which will be the distance between elements in a given dimension + * (units are in terms of clAmdFftPrecision) + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array + * @param[in] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. + * Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored contiguously. + * @sa clAmdFftSetPlanInStride + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanOutStride( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); + + /*! @brief Retrieve the distance between Array objects + * @details Pitch is the distance between each discrete array object in an FFT array. This is only used + * for 'array' dimensions in clAmdFftDim; see clAmdFftSetPlanDimension (units are in terms of clAmdFftPrecision) + * @param[in] plHandle Handle to a plan previously created + * @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input. + * For contiguous arrays in memory, iDist=(strideX*strideY*strideZ) + * @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output. + * For contiguous arrays in memory, oDist=(strideX*strideY*strideZ) + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanDistance( const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist ); + + /*! @brief Set the distance between Array objects + * @details Pitch is the distance between each discrete array object in an FFT array. This is only used + * for 'array' dimensions in clAmdFftDim; see clAmdFftSetPlanDimension (units are in terms of clAmdFftPrecision) + * @param[in] plHandle Handle to a plan previously created + * @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input. + * For contiguous arrays in memory, iDist=(strideX*strideY*strideZ) + * @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output. + * For contiguous arrays in memory, oDist=(strideX*strideY*strideZ) + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanDistance( clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist ); + + /*! @brief Retrieve the expected layout of the input and output buffers + * @details Output buffers can be filled with either hermitian or complex numbers. Complex numbers can be stored + * in various layouts; this informs the FFT engine what layout to produce on output + * @param[in] plHandle Handle to a plan previously created + * @param[out] iLayout Indicates how the input buffers are laid out in memory + * @param[out] oLayout Indicates how the output buffers are laid out in memory + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetLayout( const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout ); + + /*! @brief Set the expected layout of the input and output buffers + * @details Output buffers can be filled with either hermitian or complex numbers. Complex numbers can be stored + * in various layouts; this informs the FFT engine what layout to produce on output + * @param[in] plHandle Handle to a plan previously created + * @param[in] iLayout Indicates how the input buffers are laid out in memory + * @param[in] oLayout Indicates how the output buffers are laid out in memory + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetLayout( clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout ); + + /*! @brief Retrieve whether the input buffers are going to be overwritten with results + * @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the + * transform. If the setting is for out-of-place transforms, the engine knows to look for separate output buffers + * on the Enqueue call. + * @param[in] plHandle Handle to a plan previously created + * @param[out] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetResultLocation( const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness ); + + /*! @brief Set whether the input buffers are going to be overwritten with results + * @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the + * transform. If the setting is for out-of-place transforms, the engine knows to look for separate output buffers + * on the Enqueue call. + * @param[in] plHandle Handle to a plan previously created + * @param[in] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetResultLocation( clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness ); + + /*! @brief Retrieve the final transpose setting of a muti-dimensional FFT + * @details A multi-dimensional FFT typically transposes the data several times during calculation. If the client + * does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped + * for possible speed improvements + * @param[in] plHandle Handle to a plan previously created + * @param[out] transposed Parameter specifies whether the final transpose can be skipped + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanTransposeResult( const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed * transposed ); + + /*! @brief Set the final transpose setting of a muti-dimensional FFT + * @details A multi-dimensional FFT typically transposes the data several times during calculation. If the client + * does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped + * for possible speed improvements + * @param[in] plHandle Handle to a plan previously created + * @param[in] transposed Parameter specifies whether the final transpose can be skipped + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanTransposeResult( clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed ); + + + /*! @brief Get buffer size (in bytes), which may be needed internally for an intermediate buffer + * @details Very large FFT transforms may need multiple passes, and the operation would need a temporary buffer to hold + * intermediate results. This function is only valid after the plan is baked, otherwise an invalid operation error + * is returned. If buffersize returns as 0, the runtime needs no temporary buffer. + * @param[in] plHandle Handle to a plan previously created + * @param[out] buffersize Size in bytes for intermediate buffer + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetTmpBufSize( const clAmdFftPlanHandle plHandle, size_t* buffersize ); + + /*! @brief Enqueue an FFT transform operation, and return immediately (non-blocking) + * @details This transform API is specific to the interleaved complex format, taking an input buffer with real and imaginary + * components paired together, and outputting the results into an output buffer in the same format + * @param[in] plHandle Handle to a plan previously created + * @param[in] dir Forwards or backwards transform + * @param[in] numQueuesAndEvents Number of command queues in commQueues; number of expected events to be returned in outEvents + * @param[in] commQueues An array of cl_command_queues created by the client; the command queues must be a proper subset of + * the devices included in the plan context + * @param[in] numWaitEvents Specify the number of elements in the eventWaitList array + * @param[in] waitEvents Events that this transform should wait to complete before executing on the device + * @param[out] outEvents The runtime fills this array with events corresponding 1 to 1 with the input command queues passed + * in commQueues. This parameter can be NULL or nullptr, in which case client is not interested in receiving notifications + * when transforms are finished, otherwise if not NULL the client is responsible for allocating this array, with at least + * as many elements as specified in numQueuesAndEvents. + * @param[in] inputBuffers An array of cl_mem objects that contain data for processing by the FFT runtime. If the transform + * is in place, the FFT results will overwrite the input buffers + * @param[out] outputBuffers An array of cl_mem objects that will store the results of out of place transforms. If the transform + * is in place, this parameter may be NULL or nullptr. It is completely ignored + * @param[in] tmpBuffer A cl_mem object that is reserved as a temporary buffer for FFT processing. If clTmpBuffers is NULL or nullptr, + * and the runtime needs temporary storage, an internal temporary buffer will be created on the fly managed by the runtime. + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftEnqueueTransform( + clAmdFftPlanHandle plHandle, + clAmdFftDirection dir, + cl_uint numQueuesAndEvents, + cl_command_queue* commQueues, + cl_uint numWaitEvents, + const cl_event* waitEvents, + cl_event* outEvents, + cl_mem* inputBuffers, + cl_mem* outputBuffers, + cl_mem tmpBuffer + ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/RTCP/GPUProc/clAmdFft/include/clAmdFft.version.h b/RTCP/GPUProc/clAmdFft/include/clAmdFft.version.h new file mode 100644 index 0000000000000000000000000000000000000000..4c3e04a0ea3baa5ad95af902099cec43b4385b34 --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/include/clAmdFft.version.h @@ -0,0 +1,9 @@ +/*********************************************************************** +** Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. +***********************************************************************/ + +/* the configured version and settings for clAmdFft + */ +#define clAmdFftVersionMajor 1 +#define clAmdFftVersionMinor 8 +#define clAmdFftVersionPatch 291 diff --git a/RTCP/GPUProc/clAmdFft/install-clAmdFft-1.8.291.sh b/RTCP/GPUProc/clAmdFft/install-clAmdFft-1.8.291.sh new file mode 100755 index 0000000000000000000000000000000000000000..5f251c66390923e0423d607a11f4e1040ead20f8 --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/install-clAmdFft-1.8.291.sh @@ -0,0 +1,123 @@ +#!/bin/bash +# +# Installation script + +# Grab the name of the bash script, which must have a certain format: install-${AppName}-${AppVersion}.sh +ScriptName=(${0//-/ }) +AppName="${ScriptName[1]}" + +# Parse this version number from the name of this script +AppVersionArray=(${ScriptName[2]//./ }) +AppVersion="${AppVersionArray[0]}.${AppVersionArray[1]}.${AppVersionArray[2]}" + +# Check for any switch arguments +doneswitches=0 +badswitch=0 +acceptlicense=0 +installdir="" +while test "${doneswitches}" = "0" +do + case "${1-}" in + -accept*) + acceptlicense=1 + shift;; + -installdir=*) + installdir=`echo ${1} | sed -e 's%.*=%%'` + shift;; + -*) + echo "${ScriptName}: unrecognised switch: ${1}" + badswitch=1 + exit + shift;; + *) + doneswitches=1;; + esac +done + +showLicense() +{ +if [ "${acceptlicense}" = "1" ]; then + echo "Warning: by installing this software you have accepted" + echo "the license agreement in ${AppName}-EULA.txt" + reply="accept" +else + more ${AppName}-EULA.txt + + reply="" + while [ "${reply}" != "accept" -a "${reply}" != "decline" ]; do + echo -e "[accept/decline]? : \c" + read reply + reply=`echo ${reply} | tr [:upper:] [:lower:]` + done +fi +} + +get_yes_no() +{ +reply="" +while [ "$reply" != "y" -a "$reply" != "n" ]; do + echo -e "$1 ? [y/n] : \c" + read reply + reply=`echo ${reply} | tr [:upper:] [:lower:]` +done +} + +echo -e " ${AppName}-${AppVersion} Installation " +echo -e " ===================================== " +echo -e "" +echo -e "This script will install ${AppName} version ${AppVersion}" + +showLicense + +if [ "${reply}" != "accept" ]; then + echo "Installation declined. ${AppName}-${AppVersion} not installed." + exit +fi + +echo -e "" +echo -e "Where do you want to install ${AppName}-${AppVersion}? Press return to use" +echo -e "the default location (/opt/${AppName}-${AppVersion}), or enter an alternative path." +echo -e "The directory will be created if it does not already exist." +if [ "${installdir}" != "" ]; then + INSTALLDIR=${installdir} +else + INSTALLDIR="" + while [ "${INSTALLDIR}" = "" ]; do + echo -e "> \c" + read ans + if [ $ans ] + then + case $ans in + *) INSTALLDIR=$ans ;; + esac + else + INSTALLDIR=/opt/${AppName}-${AppVersion} + fi + done +fi + +# Replace any ~ by ${HOME} otherwise you end up with a +# subdirectory named ~ (dangerous if you then try to remove it!) +INSTALLDIR=`echo ${INSTALLDIR} | sed -e "s%~%${HOME}%g"` + +echo -e "" +echo -e "Installing to : ${INSTALLDIR}" +echo -e "" + +if [ ! -d "${INSTALLDIR}" ] +then + mkdir -p "${INSTALLDIR}" + if [ $? -ne 0 ] + then + echo -e "***** Cannot create installation directory, installation failed *****" + exit + fi +fi + +# Extract everything from the compressed tar file +fromdir=$( pwd ) +cd "${INSTALLDIR}" +tar -xvf "${fromdir}/${AppName}-${AppVersion}-Linux.tar.gz" + +echo -e "" +echo -e "====== ${AppName}-${AppVersion} installation complete ======" diff --git a/RTCP/GPUProc/clAmdFft/samples/CMakeLists.txt b/RTCP/GPUProc/clAmdFft/samples/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..dcf1f3b8d95a9b31ab72cda1c542d365da35d8c0 --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/CMakeLists.txt @@ -0,0 +1,170 @@ +############################################################################# +## Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. +############################################################################# +cmake_minimum_required( VERSION 2.6 ) +project( clAmdFft.Sample ) + +# If AMDAPPSDKROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui. +# Otherwise, create a sensible default that the user can change +if( DEFINED ENV{AMDAPPSDKROOT} ) + set( AMD_APP_SDK_ROOT $ENV{AMDAPPSDKROOT} CACHE PATH "Environment variable defining the root of the ATI Stream SDK" ) +else( ) + set( AMD_APP_SDK_ROOT "/Path/To/ATI_Stream_SDK" CACHE PATH "Modify this variable to point to the root of the ATI Stream SDK installation" ) +endif( ) + +# If BOOST_ROOT is defined as an environment value, use that value and cache it so it's visible in the cmake-gui. +# Otherwise, create a sensible default that the user can change +if( DEFINED ENV{BOOST_ROOT} ) + set( BOOST_ROOT $ENV{BOOST_ROOT} CACHE PATH "Environment variable defining the root of the Boost installation" ) +else( ) + if( UNIX ) + set( BOOST_ROOT "/usr" CACHE PATH "Modify this variable to point to the root of the Boost installation" ) + else( ) + set( BOOST_ROOT "/Path/To/boost_x_xx_x" CACHE PATH "Modify this variable to point to the root of the Boost installation" ) + endif() +endif( ) + +# Currently, linux has a problem outputing both narrow and wide characters, +# which happens in our client because openCL only supports narrow characters +if( WIN32 ) + option( UNICODE "Build with Unicode Support" ON ) + if( UNICODE ) + message( STATUS "UNICODE build" ) + endif( ) +else() + set( UNICODE OFF ) + message( STATUS "UNICODE feature disabled on linux" ) +endif() + +if( MSVC_IDE ) + set( BUILD64 ${CMAKE_CL_64} ) +else() + option( BUILD64 "Build a 64-bit product" ON ) + if( BUILD64 ) + message( STATUS "64-bit build" ) + endif( ) + + if( IS_DIRECTORY ${PROJECT_SOURCE_DIR}/library/test ) + option( CODE_COVERAGE "Build makefiles with code coverage instrumentation" OFF ) + if( CODE_COVERAGE ) + message( STATUS "Code coverage instrumentation on" ) + endif() + endif() +endif() + +# For linux, modify the global find property to help us find libraries like Boost in the correct paths +if( UNIX ) + if( BUILD64 ) + set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS TRUE ) + message( STATUS "64bit build - FIND_LIBRARY_USE_LIB64_PATHS: ${FIND_LIBRARY_USE_LIB64_PATHS}" ) + else() + set_property( GLOBAL PROPERTY FIND_LIBRARY_USE_LIB64_PATHS FALSE ) + message( STATUS "32bit build - FIND_LIBRARY_USE_LIB64_PATHS: ${FIND_LIBRARY_USE_LIB64_PATHS}" ) + endif() +endif() + +# Find the absolute path to the opencl library that we need to link too; the path depends on being 64bit or 32bit +if( BUILD64 ) + find_library( OPENCL_LIBRARIES + NAMES OpenCL + HINTS + ${AMD_APP_SDK_ROOT}/lib/ + ENV AMD_APP_SDK_ROOT + PATH_SUFFIXES x86_64 x86 + ) +else() + find_library( OPENCL_LIBRARIES + NAMES OpenCL + HINTS + ${AMD_APP_SDK_ROOT}/lib/ + ENV AMD_APP_SDK_ROOT + PATH_SUFFIXES x86 + ) +endif() +message( STATUS "OPENCL_LIBRARIES: ${OPENCL_LIBRARIES}" ) + +set( Boost_USE_MULTITHREADED ON ) +set( Boost_USE_STATIC_LIBS ON ) +set( Boost_DETAILED_FAILURE_MSG ON ) +set( Boost_DEBUG ON ) +set( Boost_ADDITIONAL_VERSIONS "1.44.0" "1.44" ) +# On linux, the boost installed in the system always appears to override any user boost installs +if( UNIX ) + set( Boost_NO_SYSTEM_PATHS TRUE ) +endif( ) +find_package( Boost 1.33.0 COMPONENTS program_options ) +message(STATUS "Boost_PROGRAM_OPTIONS_LIBRARY: ${Boost_PROGRAM_OPTIONS_LIBRARY}") + +# FFLAGS depend on the compiler, grab the compiler name from the path +get_filename_component( C_COMPILER_NAME ${CMAKE_C_COMPILER} NAME_WE ) +# message( "C_COMPILER_NAME: " ${C_COMPILER_NAME} ) +# message( "CMAKE_C_COMPILER: " ${CMAKE_C_COMPILER} ) + +# Set common compile and link options +if( C_COMPILER_NAME STREQUAL "cl" ) + # Following options for nMake + message( STATUS "Detected MSVS Ver: " ${MSVC_VERSION} ) + if( NOT MSVC_IDE ) + message( STATUS "Using an nMake environment to build" ) + + endif( ) + +elseif( C_COMPILER_NAME STREQUAL "gcc" ) + message( STATUS "Detected GNU fortran compiler." ) + # set( CMAKE_CXX_FLAGS "-std=c++0x ${CMAKE_CXX_FLAGS}" ) + + if( BUILD64 ) + set( CMAKE_CXX_FLAGS "-m64 ${CMAKE_CXX_FLAGS}" ) + set( CMAKE_C_FLAGS "-m64 ${CMAKE_C_FLAGS}" ) + else( ) + set( CMAKE_CXX_FLAGS "-m32 ${CMAKE_CXX_FLAGS}" ) + set( CMAKE_C_FLAGS "-m32 ${CMAKE_C_FLAGS}" ) + endif( ) +else( ) + message( FATAL_ERROR "Compiler name not detected" ) +endif( ) + +# If UNICODE is defined, pass extra definitions into +if( UNICODE ) + add_definitions( "/DUNICODE /D_UNICODE" ) +endif( ) + +# Print out compiler flags for viewing/debug +message( STATUS "CMAKE_CXX_COMPILER flags: " ${CMAKE_CXX_FLAGS} ) +message( STATUS "CMAKE_CXX_COMPILER debug flags: " ${CMAKE_CXX_FLAGS_DEBUG} ) +message( STATUS "CMAKE_CXX_COMPILER release flags: " ${CMAKE_CXX_FLAGS_RELEASE} ) +message( STATUS "CMAKE_CXX_COMPILER relwithdebinfo flags: " ${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ) +message( STATUS "CMAKE_EXE_LINKER link flags: " ${CMAKE_EXE_LINKER_FLAGS} ) + +include_directories( ${Boost_INCLUDE_DIRS} ${AMD_APP_SDK_ROOT}/include ${PROJECT_SOURCE_DIR}/../include ) + +# Set the OpenCL library include path depending on target platform +if( BUILD64 ) + if( WIN32 ) + link_directories( ${AMD_APP_SDK_ROOT}/lib/x86_64/ ${PROJECT_SOURCE_DIR}/../lib64/import ) + elseif( UNIX ) + link_directories( ${AMD_APP_SDK_ROOT}/lib/x86_64/ ${PROJECT_SOURCE_DIR}/../lib64 ) + endif() +else() + if( WIN32 ) + link_directories( ${AMD_APP_SDK_ROOT}/lib/x86/ ${PROJECT_SOURCE_DIR}/../lib32/import ) + elseif( UNIX ) + link_directories( ${AMD_APP_SDK_ROOT}/lib/x86/ ${PROJECT_SOURCE_DIR}/../lib32 ) + endif() +endif() + +add_executable( clAmdFft.Client + # sources follow + clAmdFft.client.cpp + clAmdFft.openCL.cpp + statisticalTimer.cpp + stdafx.cpp + clAmdFft.client.h + clAmdFft.openCL.h + statisticalTimer.h + stdafx.h + targetver.h + amd-unicode.h + ../include/clAmdFft.h ) + +target_link_libraries(clAmdFft.Client clAmdFft.Runtime ${Boost_LIBRARIES} ${OPENCL_LIBRARIES}) diff --git a/RTCP/GPUProc/clAmdFft/samples/amd-unicode.h b/RTCP/GPUProc/clAmdFft/samples/amd-unicode.h new file mode 100644 index 0000000000000000000000000000000000000000..6aad7185c760729a32dd640560d739524fb5b145 --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/amd-unicode.h @@ -0,0 +1,46 @@ +//////////////////////////////////////////// +// Copyright (C) 2011 Advanced Micro Devices, Inc. All Rights Reserved. +//////////////////////////////////////////// + +#pragma once +#if !defined( amd_unicode_h ) +#define amd_unicode_h + +// Typedefs to support unicode and ansii compilation +#if defined( _UNICODE ) + typedef std::wstring tstring; + typedef std::wstringstream tstringstream; + typedef std::wifstream tifstream; + typedef std::wofstream tofstream; + typedef std::wfstream tfstream; + static std::wostream& tout = std::wcout; + static std::wostream& terr = std::wcerr; +#else + typedef std::string tstring; + typedef std::stringstream tstringstream; + typedef std::ifstream tifstream; + typedef std::ofstream tofstream; + typedef std::fstream tfstream; + static std::ostream& tout = std::cout; + static std::ostream& terr = std::cerr; +#endif + +// These macros help linux cope with the conventions of windows tchar.h file +#if defined( _WIN32 ) + #include <tchar.h> + #include <windows.h> +#else + #if defined( __GNUC__ ) + typedef char TCHAR; + typedef char _TCHAR; + #define _tmain main + + #if defined( UNICODE ) + #define _T(x) L ## x + #else + #define _T(x) x + #endif + #endif +#endif + +#endif \ No newline at end of file diff --git a/RTCP/GPUProc/clAmdFft/samples/clAmdFft.client.cpp b/RTCP/GPUProc/clAmdFft/samples/clAmdFft.client.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b5c6d5dea1cbf904282fe2b075329ba7b981e35c --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/clAmdFft.client.cpp @@ -0,0 +1,635 @@ +//////////////////////////////////////////// +// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. +//////////////////////////////////////////// + +// clAmdFft.client.cpp : Defines the entry point for the console application. +// + +#include "stdafx.h" +#include "../include/clAmdFft.h" +#include "clAmdFft.client.h" +#include "clAmdFft.openCL.h" +#include "statisticalTimer.h" +#include "amd-unicode.h" + +namespace po = boost::program_options; + +// This is used with the program_options class so that the user can type an integer on the command line +// and we store into an enum varaible +template<class _Elem, class _Traits> +std::basic_istream<_Elem, _Traits> & operator>> (std::basic_istream<_Elem, _Traits> & stream, clAmdFftLayout & layout) +{ + cl_uint tmp; + stream >> tmp; + layout = clAmdFftLayout(tmp); + return stream; +} + +// Format an unsigned number with comma thousands separator +// +template<typename T> // T could be 32-bit or 64-bit +std::basic_string<TCHAR> commatize (T number) { + static TCHAR scratch [8*sizeof(T)]; + register TCHAR * ptr = scratch + countOf(scratch); + *(--ptr) = 0; + for (int digits = 3; ; ) { + *(--ptr) = '0' + int (number % 10); + number /= 10; + if (0 == number) + break; + if (--digits <= 0) { + *(--ptr) = ','; + digits = 3; + } + } + return std::basic_string<TCHAR> (ptr); +} // end of commatize () + + +int _tmain( int argc, _TCHAR* argv[] ) +{ + // This helps with mixing output of both wide and narrow characters to the screen + std::ios::sync_with_stdio( false ); + + // Define MEMORYREPORT on windows platfroms to enable debug memory heap checking +#if defined( MEMORYREPORT ) && defined( _WIN32 ) + TCHAR logPath[ MAX_PATH ]; + ::GetCurrentDirectory( MAX_PATH, logPath ); + ::_tcscat_s( logPath, _T( "\\MemoryReport.txt") ); + + // We leak the handle to this file, on purpose, so that the ::_CrtSetReportFile() can output it's memory + // statistics on app shutdown + HANDLE hLogFile; + hLogFile = ::CreateFile( logPath, GENERIC_WRITE, + FILE_SHARE_READ|FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL ); + + ::_CrtSetReportMode( _CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG ); + ::_CrtSetReportMode( _CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_WNDW | _CRTDBG_MODE_DEBUG ); + ::_CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG ); + + ::_CrtSetReportFile( _CRT_ASSERT, hLogFile ); + ::_CrtSetReportFile( _CRT_ERROR, hLogFile ); + ::_CrtSetReportFile( _CRT_WARN, hLogFile ); + + int tmp = ::_CrtSetDbgFlag( _CRTDBG_REPORT_FLAG ); + tmp |= _CRTDBG_LEAK_CHECK_DF | _CRTDBG_ALLOC_MEM_DF | _CRTDBG_CHECK_ALWAYS_DF; + ::_CrtSetDbgFlag( tmp ); + + // By looking at the memory leak report that is generated by this debug heap, there is a number with + // {} brackets that indicates the incremental allocation number of that block. If you wish to set + // a breakpoint on that allocation number, put it in the _CrtSetBreakAlloc() call below, and the heap + // will issue a bp on the request, allowing you to look at the call stack + // ::_CrtSetBreakAlloc( 1833 ); + +#endif /* MEMORYREPORT */ + + // OpenCL state + cl_context context; + cl_command_queue queue; + cl_mem clMemBuffersIn [ 2 ] = { NULL, NULL }; + cl_mem clMemBuffersOut[ 2 ] = { NULL, NULL }; + std::vector< cl_device_id > device_id; + cl_event outEvent = NULL; + cl_device_type deviceType = CL_DEVICE_TYPE_DEFAULT; + cl_uint deviceGpuList = 0; // a bitmap set + + // FFT state + clAmdFftPlanHandle plHandle; + clAmdFftResultLocation place = CLFFT_INPLACE; + clAmdFftLayout inLayout = CLFFT_COMPLEX_INTERLEAVED; + clAmdFftLayout outLayout = CLFFT_COMPLEX_INTERLEAVED; + size_t clLengths[ 3 ]; + size_t clPadding[ 3 ] = {0, 0, 0, }; // *** TODO + size_t clStrides[ 4 ]; + cl_uint commandQueueFlags = 0; + size_t batchSize = 1; + + // Local Data + size_t buffSizeBytesIn = 0, buffSizeBytesOut = 0; + size_t fftVectorSize= 0, fftVectorSizePadded = 0, fftBatchSize = 0; + cl_uint nBuffersOut = 0; + cl_uint profileCount = 0; + clAmdFftDim dim = CLFFT_1D; + + // Initialize flags for FFT library + std::auto_ptr< clAmdFftSetupData > setupData( new clAmdFftSetupData ); + OPENCL_V_THROW( clAmdFftInitSetupData( setupData.get( ) ), + "clAmdFftInitSetupData failed" ); + + try + { + // Declare the supported options. + po::options_description desc( "clFFT client command line options" ); + desc.add_options() + ( "help,h", "produces this help message" ) + ( "version,v", "Print queryable version information from the clFFT library" ) + ( "clInfo,i", "Print queryable information of the OpenCL runtime" ) + ( "gpu,g", "Force instantiation of an OpenCL GPU device" ) + ( "gpu0", "Force instantiation of an OpenCL GPU device using gpu0" ) + ( "gpu1", "Force instantiation of an OpenCL GPU device using gpu1" ) + ( "gpu2", "Force instantiation of an OpenCL GPU device using gpu2" ) + ( "gpu3", "Force instantiation of an OpenCL GPU device using gpu3" ) + ( "cpu,c", "Force instantiation of an OpenCL CPU device" ) + ( "all,a", "Force instantiation of all OpenCL devices" ) + ( "outPlace,o", "Out of place FFT transform (default: in place)" ) + ( "dumpKernels,d", "FFT engine will dump generated OpenCL FFT kernels to disk (default: dump off)" ) + ( "lenX,x", po::value< size_t >( &clLengths[ 0 ] )->default_value( 1024 ), "Specify the length of the 1st dimension of a test array" ) + ( "lenY,y", po::value< size_t >( &clLengths[ 1 ] )->default_value( 1 ), "Specify the length of the 2nd dimension of a test array" ) + ( "lenZ,z", po::value< size_t >( &clLengths[ 2 ] )->default_value( 1 ), "Specify the length of the 3rd dimension of a test array" ) + ( "batchSize,b", po::value< size_t >( &batchSize )->default_value( 1 ), "If this value is greater than one, arrays will be used " ) + ( "profile,p", po::value< cl_uint >( &profileCount )->default_value( 1 ), "Time and report the kernel speed of the FFT (default: profiling off)" ) + ( "inLayout", po::value< clAmdFftLayout >( &inLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar" ) + ( "outLayout", po::value< clAmdFftLayout >( &outLayout )->default_value( CLFFT_COMPLEX_INTERLEAVED ), "Layout of input data:\n1) interleaved\n2) planar" ) + ; + + po::variables_map vm; + po::store( po::parse_command_line( argc, argv, desc ), vm ); + po::notify( vm ); + + if( vm.count( "version" ) ) + { + const int indent = countOf( "clFFT client API version: " ); + tout << std::left << std::setw( indent ) << _T( "clFFT client API version: " ) + << clAmdFftVersionMajor << _T( "." ) + << clAmdFftVersionMinor << _T( "." ) + << clAmdFftVersionPatch << std::endl; + + cl_uint libMajor, libMinor, libPatch; + clAmdFftGetVersion( &libMajor, &libMinor, &libPatch ); + + tout << std::left << std::setw( indent ) << _T( "clFFT runtime version: " ) + << libMajor << _T( "." ) + << libMinor << _T( "." ) + << libPatch << std::endl << std::endl; + } + + if( vm.count( "help" ) ) + { + // This needs to be 'cout' as program-options does not support wcout yet + std::cout << desc << std::endl; + return 0; + } + + size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0) + | ((vm.count( "gpu0" ) > 0) ? 1 : 0) + | ((vm.count( "gpu1" ) > 0) ? 1 : 0) + | ((vm.count( "gpu2" ) > 0) ? 1 : 0) + | ((vm.count( "gpu3" ) > 0) ? 1 : 0) + | ((vm.count( "cpu" ) > 0) ? 2 : 0) + | ((vm.count( "all" ) > 0) ? 4 : 0); + if ((mutex & (mutex-1)) != 0) { + terr << _T("You have selected mutually-exclusive OpenCL device options:") << std::endl; + if (vm.count ( "gpu" ) > 0) terr << _T(" gpu,g Force instantiation of an OpenCL GPU device" ) << std::endl; + if (vm.count ( "gpu0" ) > 0) terr << _T(" gpu0 Force instantiation of an OpenCL GPU device using gpu0" ) << std::endl; + if (vm.count ( "gpu1" ) > 0) terr << _T(" gpu1 Force instantiation of an OpenCL GPU device using gpu1" ) << std::endl; + if (vm.count ( "gpu2" ) > 0) terr << _T(" gpu2 Force instantiation of an OpenCL GPU device using gpu2" ) << std::endl; + if (vm.count ( "gpu3" ) > 0) terr << _T(" gpu3 Force instantiation of an OpenCL GPU device using gpu3" ) << std::endl; + if (vm.count ( "cpu" ) > 0) terr << _T(" cpu,c Force instantiation of an OpenCL CPU device" ) << std::endl; + if (vm.count ( "all" ) > 0) terr << _T(" all,a Force instantiation of all OpenCL devices" ) << std::endl; + return 1; + } + + if( vm.count( "gpu" ) ) + { + deviceType = CL_DEVICE_TYPE_GPU; + deviceGpuList = ~0; + } + if( vm.count( "gpu0" ) ) + { + deviceType = CL_DEVICE_TYPE_GPU; + deviceGpuList |= 1; + } + if( vm.count( "gpu1" ) ) + { + deviceType = CL_DEVICE_TYPE_GPU; + deviceGpuList |= 2; + } + if( vm.count( "gpu2" ) ) + { + deviceType = CL_DEVICE_TYPE_GPU; + deviceGpuList |= 4; + } + if( vm.count( "gpu3" ) ) + { + deviceType = CL_DEVICE_TYPE_GPU; + deviceGpuList |= 8; + } + + if( vm.count( "cpu" ) ) + { + deviceType = CL_DEVICE_TYPE_CPU; + } + + if( vm.count( "all" ) ) + { + deviceType = CL_DEVICE_TYPE_ALL; + } + + bool printInfo = false; + if( vm.count( "clInfo" ) ) + { + printInfo = true; + } + + if( vm.count( "outPlace" ) ) + { + place = CLFFT_OUTOFPLACE; + } + + if( profileCount > 1 ) + { + commandQueueFlags |= CL_QUEUE_PROFILING_ENABLE; + } + + if( vm.count( "dumpKernels" ) ) + { + setupData->debugFlags |= CLFFT_DUMP_PROGRAMS; + } + + // Our command line does not specify what dimension FFT we wish to transform; we decode + // this from the lengths that the user specifies for X, Y, Z. A length of one means that + // The user does not want that dimension. + + for (unsigned u = 0; u < countOf(clLengths); ++u) { + if (0 != clLengths[u]) continue; + clLengths[u] = 1; + } + + dim = CLFFT_1D; + if( clLengths[ 1 ] > 1 ) + { + dim = CLFFT_2D; + } + if( clLengths[ 2 ] > 1 ) + { + dim = CLFFT_3D; + } + + clStrides[ 0 ] = 1; + clStrides[ 1 ] = clStrides[ 0 ] * (clLengths[ 0 ] + clPadding[ 0 ]); + clStrides[ 2 ] = clStrides[ 1 ] * (clLengths[ 1 ] + clPadding[ 1 ]); + clStrides[ 3 ] = clStrides[ 2 ] * (clLengths[ 2 ] + clPadding[ 2 ]); + + fftVectorSize = clLengths[ 0 ] * clLengths[ 1 ] * clLengths[ 2 ]; + fftVectorSizePadded = clStrides[ 3]; + fftBatchSize = fftVectorSizePadded * batchSize; + + switch( outLayout ) + { + case CLFFT_COMPLEX_INTERLEAVED: + nBuffersOut = 1; + buffSizeBytesOut = fftBatchSize * sizeof( std::complex< float > ); + break; + case CLFFT_COMPLEX_PLANAR: + nBuffersOut = 2; + buffSizeBytesOut = fftBatchSize * sizeof(float); + break; + } + + // Fill our input buffers depending on how we want + switch( inLayout ) + { + case CLFFT_COMPLEX_INTERLEAVED: + { + // This call creates our openCL context and sets up our devices; expected to throw on error + buffSizeBytesIn = fftBatchSize * sizeof( std::complex< float > ); + + device_id = initializeCL( deviceType, deviceGpuList, context, printInfo ); + createOpenCLCommandQueue( context, + commandQueueFlags, queue, + device_id, + buffSizeBytesIn, 1, clMemBuffersIn, + buffSizeBytesOut, nBuffersOut, clMemBuffersOut); + + std::vector< std::complex< float > > input( fftBatchSize ); + + // impulse test case + for( cl_uint i = 0; i < fftBatchSize; ++i ) + { + input[ i ] = 1; + } + + OPENCL_V_THROW( clEnqueueWriteBuffer( queue, clMemBuffersIn[ 0 ], CL_TRUE, 0, buffSizeBytesIn, &input[ 0 ], + 0, NULL, &outEvent ), + "clEnqueueWriteBuffer failed" ); + + //for( cl_uint i = 0; i < fftBatchSize; ++i ) + //{ + // input[ i ] = 1.23456f; + //} + + //OPENCL_V_THROW( clEnqueueWriteBuffer( queue, clMemBuffersOut[ 0 ], CL_TRUE, 0, buffSizeBytes, &input[ 0 ], + // 0, NULL, &outEvent ), + // "clEnqueueWriteBuffer failed" ); + } + break; + case CLFFT_COMPLEX_PLANAR: + { + // This call creates our openCL context and sets up our devices; expected to throw on error + buffSizeBytesIn = fftBatchSize * sizeof( float ); + + device_id = initializeCL( deviceType, deviceGpuList, context, printInfo ); + createOpenCLCommandQueue( context, + commandQueueFlags, queue, + device_id, + buffSizeBytesIn, 2, clMemBuffersIn, + buffSizeBytesOut, nBuffersOut, clMemBuffersOut); + + // Just initialize the input buffer to all 1's for now + std::vector< float > real( fftBatchSize ); + std::vector< float > imag( fftBatchSize ); + for( cl_uint i = 0; i < fftBatchSize; ++i ) + { + real[ i ] = 1; + imag[ i ] = 0; + } + + OPENCL_V_THROW( clEnqueueWriteBuffer( queue, clMemBuffersIn[ 0 ], CL_TRUE, 0, buffSizeBytesIn, &real[ 0 ], + 0, NULL, &outEvent ), + "clEnqueueWriteBuffer failed" ); + OPENCL_V_THROW( clEnqueueWriteBuffer( queue, clMemBuffersIn[ 1 ], CL_TRUE, 0, buffSizeBytesIn, &imag[ 0 ], + 0, NULL, &outEvent ), + "clEnqueueWriteBuffer failed" ); + } + break; + default: + { + throw std::runtime_error( "Input layout format not yet supported" ); + } + break; + } + + } + catch( std::exception& e ) + { + terr << _T( "clFFT error condition reported:" ) << std::endl << e.what() << std::endl; + return 1; + } + + // Performance Data + StatisticalTimer& sTimer = StatisticalTimer::getInstance( ); + sTimer.Reserve( 3, profileCount ); + sTimer.setNormalize( true ); + StatisticalTimer::sTimerID clFFTID = sTimer.getUniqueID( "clFFT", 0 ); + + OPENCL_V_THROW( clAmdFftSetup( setupData.get( ) ), "clAmdFftSetup failed" ); + + OPENCL_V_THROW( clAmdFftCreateDefaultPlan( &plHandle, context, dim, clLengths ), "clAmdFftCreateDefaultPlan failed" ); + + // Default plan creates a plan that expects an inPlace transform with interleaved complex numbers + OPENCL_V_THROW( clAmdFftSetResultLocation( plHandle, place ), "clAmdFftSetResultLocation failed" ); + OPENCL_V_THROW( clAmdFftSetLayout( plHandle, inLayout, outLayout ), "clAmdFftSetLayout failed" ); + OPENCL_V_THROW( clAmdFftSetPlanBatchSize( plHandle, batchSize ), "clAmdFftSetPlanBatchSize failed" ); + + if ((clPadding[ 0 ] | clPadding[ 1 ] | clPadding[ 2 ]) != 0) { + OPENCL_V_THROW (clAmdFftSetPlanInStride ( plHandle, dim, clStrides ), "clAmdFftSetPlanInStride failed" ); + OPENCL_V_THROW (clAmdFftSetPlanOutStride ( plHandle, dim, clStrides ), "clAmdFftSetPlanOutStride failed" ); + OPENCL_V_THROW (clAmdFftSetPlanDistance ( plHandle, clStrides[ dim ], clStrides[ dim ]), "clAmdFftSetPlanDistance failed" ); + } + + OPENCL_V_THROW( clAmdFftBakePlan( plHandle, 1, &queue, NULL, NULL ), "clAmdFftBakePlan failed" ); + + //get the buffersize + size_t buffersize=0; + OPENCL_V_THROW( clAmdFftGetTmpBufSize(plHandle, &buffersize ), "clAmdFftGetTmpBufSize failed" ); + + //allocate the intermediate buffer + cl_mem clMedBuffer=NULL; + + if (buffersize) + { + cl_int medstatus; + clMedBuffer = clCreateBuffer ( context, CL_MEM_READ_WRITE, buffersize, 0, &medstatus); + OPENCL_V_THROW( medstatus, "Creating intmediate Buffer failed" ); + } + + switch( inLayout ) + { + case CLFFT_COMPLEX_INTERLEAVED: + case CLFFT_COMPLEX_PLANAR: + break; + default: + // Don't recognize input layout + return CLFFT_INVALID_ARG_VALUE; + } + + switch( outLayout ) + { + case CLFFT_COMPLEX_INTERLEAVED: + case CLFFT_COMPLEX_PLANAR: + break; + default: + // Don't recognize output layout + return CLFFT_INVALID_ARG_VALUE; + } + + if (( place == CLFFT_INPLACE ) + && ( inLayout != outLayout )) { + switch( inLayout ) + { + case CLFFT_COMPLEX_INTERLEAVED: + { + assert (CLFFT_COMPLEX_PLANAR == outLayout); + throw std::runtime_error( "Cannot use the same buffer for interleaved->planar in-place transforms" ); + break; + } + case CLFFT_COMPLEX_PLANAR: + { + assert (CLFFT_COMPLEX_INTERLEAVED == outLayout); + throw std::runtime_error( "Cannot use the same buffer for planar->interleaved in-place transforms" ); + break; + } + } + } + + // Loop as many times as the user specifies to average out the timings + // + cl_mem * BuffersOut = ( place == CLFFT_INPLACE ) ? NULL : &clMemBuffersOut[ 0 ]; + sTimer.Start(clFFTID); + for( cl_uint i = 0; i < profileCount; ++i ) + { + OPENCL_V_THROW( clAmdFftEnqueueTransform( plHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, &outEvent, + &clMemBuffersIn[ 0 ], BuffersOut, clMedBuffer ), + "clAmdFftEnqueueTransform failed" ); + } + OPENCL_V_THROW( clFinish( queue ), "clFinish failed" ); + sTimer.Stop(clFFTID); + + if( commandQueueFlags & CL_QUEUE_PROFILING_ENABLE ) + { + // Remove all timings that are outside of 3 stddev; we ignore outliers to get a more consistent result + sTimer.pruneOutliers( 3.0 ); + + // windows frequency count is by seconds + double kernelExecTimeNs = sTimer.getAverageTime( clFFTID ) * 1e9/profileCount; + double kernelExecGflops = 5 * fftBatchSize * (log(static_cast<float>(fftVectorSize))/log(2.0f)) / static_cast< double >( kernelExecTimeNs ); + + tout << _T( "FFT kernel execution time < ns >: " ) << commatize ((unsigned long long) kernelExecTimeNs) << std::endl; + tout << _T( "FFT kernel execution Gflops < BatchSize*5*N*log2( N ) >: " ) << kernelExecGflops << std::endl; + } + sTimer.Reset( ); + + // Read and check output data + // This check is not valid if the FFT is executed multiple times inplace. + // + if (( place == CLFFT_OUTOFPLACE ) + || ( profileCount == 1)) + { + bool checkflag= false; + switch( outLayout ) + { + case CLFFT_COMPLEX_INTERLEAVED: + { + std::vector< std::complex< float > > output( fftBatchSize ); + + if( place == CLFFT_INPLACE ) + { + OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersIn[ 0 ], CL_TRUE, 0, buffSizeBytesIn, &output[ 0 ], + 0, NULL, NULL ), + "Reading the result buffer failed" ); + } + else + { + OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersOut[ 0 ], CL_TRUE, 0, buffSizeBytesOut, &output[ 0 ], + 0, NULL, NULL ), + "Reading the result buffer failed" ); + } + + //check output data + for( cl_uint i = 0; i < fftBatchSize; ++i ) + { + if (0 == (i % fftVectorSizePadded)) + { + if (output[i].real() != fftVectorSize) + { + checkflag = true; + break; + } + } + else + { + if (output[ i ].real() != 0) + { + checkflag = true; + break; + } + } + + if (output[ i ].imag() != 0) + { + checkflag = true; + break; + } + } + } + break; + case CLFFT_COMPLEX_PLANAR: + { + std::valarray< float > real( fftBatchSize ); + std::valarray< float > imag( fftBatchSize ); + + if( place == CLFFT_INPLACE ) + { + OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersIn[ 0 ], CL_TRUE, 0, buffSizeBytesIn, &real[ 0 ], + 0, NULL, NULL ), + "Reading the result buffer failed" ); + OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersIn[ 1 ], CL_TRUE, 0, buffSizeBytesIn, &imag[ 0 ], + 0, NULL, NULL ), + "Reading the result buffer failed" ); + } + else + { + OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersOut[ 0 ], CL_TRUE, 0, buffSizeBytesOut, &real[ 0 ], + 0, NULL, NULL ), + "Reading the result buffer failed" ); + OPENCL_V_THROW( clEnqueueReadBuffer( queue, clMemBuffersOut[ 1 ], CL_TRUE, 0, buffSizeBytesOut, &imag[ 0 ], + 0, NULL, NULL ), + "Reading the result buffer failed" ); + } + + // Check output data + // The output data might not be contiguous in the output buffer, if there + // is any padding in any dimension, so we need to access slices of the buffer. + // We treat the data buffers as arrays of 3D arrays in all cases. + // If this is a 1D test, then + // clLength[ 1 ] and clLength[ 2] will be 1. + // The first element of every 1D slice will be nonzero. + // If this is a 2D test, then + // clLength[ 2 ] will be a. + // The first elment of every 2D slice will be nonzero. + // If this is a 3D test, then + // The first element of every 3D slice will be nonzero. + // + for (unsigned ub = 0; ub < batchSize; ++ub) { + std::slice slice3D (ub * clStrides[ 3 ], clStrides[ 3 ], 1); + std::valarray<float> real3D (real[ slice3D ]); + for (unsigned uz = 0; uz < clLengths[2]; ++uz) { + std::slice slice2D (uz * clStrides[ 2 ], clStrides[ 2 ], 1); + std::valarray<float> real2D (real[ slice2D ]); + bool nzZ = (dim == CLFFT_3D) && (0 == uz); + for (unsigned uy = 0; uy < clLengths[1]; ++uy) { + std::slice slice1D (uy * clStrides[ 1], clStrides[ 1], 1); + std::valarray<float> real1D (real2D [ slice1D ]); + bool nzY = (nzZ || (dim == CLFFT_2D)) && (0 == uy); + for (unsigned ux = 0; ux < clLengths[0]; ++ux) { + bool nzX = (nzY || (dim == CLFFT_1D)) && (0 == ux); + float expected = nzX ? float (fftVectorSize) : 0.0f; + if (real1D[ux] != expected) + checkflag = true; + } + } + } + } + + ////check output data + //for( cl_uint i = 0; i < fftBatchSize; ++i ) + //{ + // if (0 == (i % fftVectorSizePadded)) + // { + // if (real[i] != fftVectorSize) + // { + // checkflag = true; + // break; + // } + // } + // else + // { + // if (real[i] != 0) + // { + // checkflag = true; + // break; + // } + // } + + // if (imag[i] != 0) + // { + // checkflag = true; + // break; + // } + //} + } + break; + default: + { + throw std::runtime_error( "Input layout format not yet supported" ); + } + break; + } + + if (checkflag) + { + std::cout << "\n\n\t\tClient Test *****FAIL*****" << std::endl; + } + else + { + std::cout << "\n\n\t\tClient Test *****PASS*****" << std::endl; + } + } + + OPENCL_V_THROW( clAmdFftDestroyPlan( &plHandle ), "clAmdFftDestroyPlan failed" ); + OPENCL_V_THROW( clAmdFftTeardown( ), "clAmdFftTeardown failed" ); + + cleanupCL( &context, &queue, countOf( clMemBuffersIn ), clMemBuffersIn, countOf( clMemBuffersOut ), clMemBuffersOut, &outEvent ); + + return 0; +} diff --git a/RTCP/GPUProc/clAmdFft/samples/clAmdFft.client.h b/RTCP/GPUProc/clAmdFft/samples/clAmdFft.client.h new file mode 100644 index 0000000000000000000000000000000000000000..8d4ac80bbcb78849b758e7698b145667368b1cb4 --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/clAmdFft.client.h @@ -0,0 +1,13 @@ +//////////////////////////////////////////// +// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. +//////////////////////////////////////////// + +#pragma once +#if !defined( CLIENT_H ) +#define CLIENT_H + +// Boost headers that we want to use +// #define BOOST_PROGRAM_OPTIONS_DYN_LINK +#include <boost/program_options.hpp> + +#endif diff --git a/RTCP/GPUProc/clAmdFft/samples/clAmdFft.h b/RTCP/GPUProc/clAmdFft/samples/clAmdFft.h new file mode 100644 index 0000000000000000000000000000000000000000..73e65d0d349eeb24d7d27997d78abe43661a244a --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/clAmdFft.h @@ -0,0 +1,573 @@ +/*********************************************************************** +** Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. +***********************************************************************/ + +/*! @file clAmdFft.h + * clAmdFft.h defines all of the public interfaces and types that are meant to be used by clFFT clients + * This is the one public header file that should be consumed by clFFT clients. It is written to adhere to native "C" + * interfaces to make clAmdFft library as portable as possible; it should be callable from C, C++, .NET and Fortran, + * either with the proper linking or using wrapper classes. + * + */ + +#pragma once +#if !defined( CLAMDFFT_DOTH ) +#define CLAMDFFT_DOTH + +#if defined(__APPLE__) || defined(__MACOSX) + #include <OpenCL/cl.h> +#else + #include <CL/cl.h> +#endif + +#include "clAmdFft.version.h" + +/*! This preprocessor definition is the standard way of making exporting APIs + * from a DLL simpler. All files within this DLL are compiled with the CLAMDFFT_EXPORTS + * symbol defined on the command line. This symbol should not be defined on any project + * that uses this DLL. This way any other project whose source files include this file see + * clAmdFft functions as being imported from a DLL, whereas this DLL sees symbols + * defined with this macro as being exported. + */ +#if defined( _WIN32 ) + #if !defined( __cplusplus ) + #define inline __inline + #endif + + #if defined( CLAMDFFT_EXPORTS ) + #define CLAMDFFTAPI __declspec( dllexport ) + #else + #define CLAMDFFTAPI __declspec( dllimport ) + #endif +#else + #define CLAMDFFTAPI +#endif + +/* In general, you can not use namespaces for strict C compliance, so we prefix our public accessible names + * with the string clAmdFft + */ + +/* All functions will return pre-defined error codes, and will NOT throw exceptions to the caller + */ + +/*! @brief clAmdFft error codes definition, incorporating OpenCL error definitions + * + * This enumeration is a superset of the OpenCL error codes. For example, CL_OUT_OF_HOST_MEMORY, + * which is defined in cl.h is aliased as CLFFT_OUT_OF_HOST_MEMORY. The set of basic OpenCL + * error codes is extended to add extra values specific to the clAmdFft package. + */ +enum clAmdFftStatus_ +{ + CLFFT_INVALID_GLOBAL_WORK_SIZE = CL_INVALID_GLOBAL_WORK_SIZE, + CLFFT_INVALID_MIP_LEVEL = CL_INVALID_MIP_LEVEL, + CLFFT_INVALID_BUFFER_SIZE = CL_INVALID_BUFFER_SIZE, + CLFFT_INVALID_GL_OBJECT = CL_INVALID_GL_OBJECT, + CLFFT_INVALID_OPERATION = CL_INVALID_OPERATION, + CLFFT_INVALID_EVENT = CL_INVALID_EVENT, + CLFFT_INVALID_EVENT_WAIT_LIST = CL_INVALID_EVENT_WAIT_LIST, + CLFFT_INVALID_GLOBAL_OFFSET = CL_INVALID_GLOBAL_OFFSET, + CLFFT_INVALID_WORK_ITEM_SIZE = CL_INVALID_WORK_ITEM_SIZE, + CLFFT_INVALID_WORK_GROUP_SIZE = CL_INVALID_WORK_GROUP_SIZE, + CLFFT_INVALID_WORK_DIMENSION = CL_INVALID_WORK_DIMENSION, + CLFFT_INVALID_KERNEL_ARGS = CL_INVALID_KERNEL_ARGS, + CLFFT_INVALID_ARG_SIZE = CL_INVALID_ARG_SIZE, + CLFFT_INVALID_ARG_VALUE = CL_INVALID_ARG_VALUE, + CLFFT_INVALID_ARG_INDEX = CL_INVALID_ARG_INDEX, + CLFFT_INVALID_KERNEL = CL_INVALID_KERNEL, + CLFFT_INVALID_KERNEL_DEFINITION = CL_INVALID_KERNEL_DEFINITION, + CLFFT_INVALID_KERNEL_NAME = CL_INVALID_KERNEL_NAME, + CLFFT_INVALID_PROGRAM_EXECUTABLE = CL_INVALID_PROGRAM_EXECUTABLE, + CLFFT_INVALID_PROGRAM = CL_INVALID_PROGRAM, + CLFFT_INVALID_BUILD_OPTIONS = CL_INVALID_BUILD_OPTIONS, + CLFFT_INVALID_BINARY = CL_INVALID_BINARY, + CLFFT_INVALID_SAMPLER = CL_INVALID_SAMPLER, + CLFFT_INVALID_IMAGE_SIZE = CL_INVALID_IMAGE_SIZE, + CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR = CL_INVALID_IMAGE_FORMAT_DESCRIPTOR, + CLFFT_INVALID_MEM_OBJECT = CL_INVALID_MEM_OBJECT, + CLFFT_INVALID_HOST_PTR = CL_INVALID_HOST_PTR, + CLFFT_INVALID_COMMAND_QUEUE = CL_INVALID_COMMAND_QUEUE, + CLFFT_INVALID_QUEUE_PROPERTIES = CL_INVALID_QUEUE_PROPERTIES, + CLFFT_INVALID_CONTEXT = CL_INVALID_CONTEXT, + CLFFT_INVALID_DEVICE = CL_INVALID_DEVICE, + CLFFT_INVALID_PLATFORM = CL_INVALID_PLATFORM, + CLFFT_INVALID_DEVICE_TYPE = CL_INVALID_DEVICE_TYPE, + CLFFT_INVALID_VALUE = CL_INVALID_VALUE, + CLFFT_MAP_FAILURE = CL_MAP_FAILURE, + CLFFT_BUILD_PROGRAM_FAILURE = CL_BUILD_PROGRAM_FAILURE, + CLFFT_IMAGE_FORMAT_NOT_SUPPORTED = CL_IMAGE_FORMAT_NOT_SUPPORTED, + CLFFT_IMAGE_FORMAT_MISMATCH = CL_IMAGE_FORMAT_MISMATCH, + CLFFT_MEM_COPY_OVERLAP = CL_MEM_COPY_OVERLAP, + CLFFT_PROFILING_INFO_NOT_AVAILABLE = CL_PROFILING_INFO_NOT_AVAILABLE, + CLFFT_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY, + CLFFT_OUT_OF_RESOURCES = CL_OUT_OF_RESOURCES, + CLFFT_MEM_OBJECT_ALLOCATION_FAILURE = CL_MEM_OBJECT_ALLOCATION_FAILURE, + CLFFT_COMPILER_NOT_AVAILABLE = CL_COMPILER_NOT_AVAILABLE, + CLFFT_DEVICE_NOT_AVAILABLE = CL_DEVICE_NOT_AVAILABLE, + CLFFT_DEVICE_NOT_FOUND = CL_DEVICE_NOT_FOUND, + CLFFT_SUCCESS = CL_SUCCESS, + //-------------------------- Extended status codes for clAmdFft ---------------------------------------- + CLFFT_BUGCHECK = 4*1024, /*!< Bugcheck. */ + CLFFT_NOTIMPLEMENTED, /*!< Functionality is not implemented yet. */ + CLFFT_TRANSPOSED_NOTIMPLEMENTED, /*!< Transposed functionality is not implemented for this transformation. */ + CLFFT_FILE_NOT_FOUND, /*!< Tried to open an existing file on the host system, but failed. */ + CLFFT_FILE_CREATE_FAILURE, /*!< Tried to create a file on the host system, but failed. */ + CLFFT_VERSION_MISMATCH, /*!< Version conflict between client and library. */ + CLFFT_INVALID_PLAN, /*!< Requested plan could not be found. */ + CLFFT_DEVICE_NO_DOUBLE, /*!< Double precision not supported on this device. */ + CLFFT_ENDSTATUS /* This value will always be last, and marks the length of clAmdFftStatus. */ +}; +typedef enum clAmdFftStatus_ clAmdFftStatus; + +/*! @brief The dimension of the input and output buffers that will be fed into all FFT transforms */ +typedef enum clAmdFftDim_ +{ + CLFFT_1D = 1, /*!< 1 Dimensional FFT transform (default). */ + CLFFT_2D, /*!< 2 Dimensional FFT transform. */ + CLFFT_3D, /*!< 3 Dimensional FFT transform. */ + ENDDIMENSION /*!< This value will always be last, and marks the length of clAmdFftDim. */ +} clAmdFftDim; + +/*! @brief What are the expected layout's of the complex numbers <p> + * <b> For Release 1.0,</b> only the CLFFT_COMPLEX_INTERLEAVED and CLFFT_COMPLEX_PLANAR formats are supported. + * The real and hermitian formats should be supported in a future release. + */ +typedef enum clAmdFftLayout_ +{ + CLFFT_COMPLEX_INTERLEAVED = 1, /*!< An array of complex numbers, with real and imaginary components together (default). */ + CLFFT_COMPLEX_PLANAR, /*!< Arrays of real componets and arrays of imaginary components that have been seperated out. */ + CLFFT_HERMITIAN_INTERLEAVED, /*!< Compressed form of complex numbers; complex-conjugates not stored, real and imaginary components in same array. TODO: Document layout */ + CLFFT_HERMITIAN_PLANAR, /*!< Compressed form of complex numbers; complex-conjugates not stored, real and imaginary components in separate arrays. TODO: Document layout */ + CLFFT_REAL, /*!< An array of real numbers, with no corresponding imaginary components. */ + ENDLAYOUT /*!< This value will always be last, and marks the length of clAmdFftLayout. */ +} clAmdFftLayout; + +/*! @brief What is the expected precision of each FFT. + * @ref DistanceStridesandPitches + */ +typedef enum clAmdFftPrecision_ +{ + CLFFT_SINGLE = 1, /*!< An array of complex numbers, with real and imaginary components as floats (default). */ + CLFFT_DOUBLE, /*!< An array of complex numbers, with real and imaginary components as doubles. */ + CLFFT_SINGLE_FAST, /*!< Faster implementation preferred. */ + CLFFT_DOUBLE_FAST, /*!< Faster implementation preferred. */ + ENDPRECISION /*!< This value will always be last, and marks the length of clAmdFftPrecision. */ +} clAmdFftPrecision; + +/*! @brief What is the expected direction of each FFT, time or the frequency domains */ +typedef enum clAmdFftDirection_ +{ + CLFFT_FORWARD = -1, /*!< FFT transform from the time to the frequency domain. */ + CLFFT_BACKWARD = 1, /*!< FFT transform from the frequency to the time domain. */ + CLFFT_MINUS = -1, /*!< Alias for the forward transform. */ + CLFFT_PLUS = 1, /*!< Alias for the backward transform. */ + ENDDIRECTION /*!< This value will always be last, and marks the length of clAmdFftDirection. */ +} clAmdFftDirection; + +/*! @brief Are the input buffers overwritten with the results */ +typedef enum clAmdFftResultLocation_ +{ + CLFFT_INPLACE = 1, /*!< The input and output buffers are the same (default). */ + CLFFT_OUTOFPLACE, /*!< Seperate input and output buffers. */ + ENDPLACE /*!< This value will always be last, and marks the length of clAmdFftPlaceness. */ +} clAmdFftResultLocation; + +/*! @brief whether the result will be returned in original order; only valid for dimensions greater than 1 */ +typedef enum clAmdFftResultTransposed_ { + CLFFT_NOTRANSPOSE = 1, /*!< The results are returned in the original preserved order (default) */ + CLFFT_TRANSPOSED, /*!< The result is transposed where transpose kernel is supported (possibly faster) */ + ENDTRANSPOSED /*!< This value will always be last, and marks the length of clAmdFftResultTransposed */ +} clAmdFftResultTransposed; + +/*! BitMasks to be used with clAmdFftSetupData.debugFlags */ +#define CLFFT_DUMP_PROGRAMS 0x1 + +/*! @brief Data structure that can be passed to clAmdFftSetup() to control the behavior of the FFT runtime + * @details This structure contains values that can be initialized before instantiation of the FFT runtime + * with ::clAmdFftSetup(). To initialize this structure, pass a pointer to a user struct to ::clAmdFftInitSetupData( ), + * which will clear the structure and set the version member variables to the current values. + */ +struct clAmdFftSetupData_ +{ + cl_uint major; /*!< Major version number of the project; signifies major API changes. */ + cl_uint minor; /*!< Minor version number of the project; minor API changes that could break backwards compatibility. */ + cl_uint patch; /*!< Patch version number of the project; Always incrementing number, signifies change over time. */ + + /*! Bitwise flags that control the behavior of library debug logic. */ + cl_ulong debugFlags; /*! This should be set to zero, except when debugging the clAmdFft library. + * <p> debugFlags can be set to CLFFT_DUMP_PROGRAMS, in which case the dynamically generated OpenCL kernels will + * be written to text files in the current working directory. These files will have a *.cl suffix. + */ +}; +typedef struct clAmdFftSetupData_ clAmdFftSetupData; + +/*! @brief An abstract handle to the object that represents the state of the FFT(s) */ +typedef size_t clAmdFftPlanHandle; + +#ifdef __cplusplus +extern "C" { +#endif + /*! @brief Initialize an clAmdFftSetupData struct for the client + * @details clAmdFftSetupData is passed to clAmdFftSetup to control behavior of the FFT runtime + * @param[out] setupData Data structure is cleared, initialized with version information and default values + * @return Enum describing error condition; superset of OpenCL error codes + */ + inline clAmdFftStatus clAmdFftInitSetupData( clAmdFftSetupData* setupData ) + { + setupData->major = clAmdFftVersionMajor; + setupData->minor = clAmdFftVersionMinor; + setupData->patch = clAmdFftVersionPatch; + setupData->debugFlags = 0; + + return CLFFT_SUCCESS; + } + + /*! @brief Initialize internal FFT resources. + * @details AMD's FFT implementation caches kernels, programs and buffers for its internal use. + * @param[in] setupData Data structure that can be passed into the setup routine to control FFT generation behavior + * and debug functionality + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetup( const clAmdFftSetupData* setupData ); + + /*! @brief Release all internal resources. + * @details Call when client is done with this FFT library, allowing the library to destroy all resources it has cached + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftTeardown( ); + + /*! @brief Query the FFT library for version information + * @details Return the major, minor and patch version numbers associated with this FFT library + * @param[out] major Major functionality change + * @param[out] minor Minor functionality change + * @param[out] patch Bug fixes, documentation changes, no new features introduced + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetVersion( cl_uint* major, cl_uint* minor, cl_uint* patch ); + + /*! @brief Create a plan object initialized entirely with default values. + * @details A plan is a repository of state for calculating FFT's. Allows the runtime to pre-calculate kernels, programs + * and buffers and associate them with buffers of specified dimensions. + * @param[out] plHandle Handle to the newly created plan + * @param[in] context Client is responsible for providing an OpenCL context for the plan + * @param[in] dim The dimensionality of the FFT transform; describes how many elements are in the array + * @param[in] clLengths An array of lengths, of size 'dim'. Each value describes the length of additional dimensions + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftCreateDefaultPlan( clAmdFftPlanHandle* plHandle, cl_context context, const clAmdFftDim dim, + const size_t* clLengths ); + + /*! @brief Create a copy of an existing plan. + * @details This API allows a client to create a new plan based upon an existing plan. This is a convenience function + * provided for quickly creating plans that are similar, but may differ slightly. + * @param[out] out_plHandle Handle to the newly created plan that is based on in_plHandle + * @param[in] new_context Client is responsible for providing a new context for the new plan + * @param[in] in_plHandle Handle to a plan to be copied, previously created + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftCopyPlan( clAmdFftPlanHandle* out_plHandle, cl_context new_context, clAmdFftPlanHandle in_plHandle ); + + /*! @brief Prepare the plan for execution. + * @details After all plan parameters are set, the client has the option of 'baking' the plan, which tells the runtime that + * no more changes to the plan's parameters are expected, and the OpenCL kernels should be compiled. This optional function + * allows the client application to perform this function when the application is being initialized instead of on the first + * execution. + * At this point, the clAmdFft runtime will apply all implimented optimizations, possibly including + * running kernel experiments on the devices in the plan context. + * <p> Users should assume that this function will take a long time to execute. If a plan is not baked before being executed, + * users should assume that the first call to clAmdFftEnqueueTransform will take a long time to execute. + * <p> If any significant parameter of a plan is changed after the plan is baked (by a subsequent call to one of + * the clAmdFftSetPlan____ functions), that will not be considered an error. Instead, the plan will revert back to + * the unbaked state, discarding the benefits of the baking operation. + * @param[in] plHandle Handle to a plan previously created + * @param[in] numQueues Number of command queues in commQueueFFT; 0 is a valid value, in which case client does not want + * the runtime to run load experiments and only pre-calculate state information + * @param[in] commQueueFFT An array of cl_command_queues created by the client; the command queues must be a proper subset of + * the devices included in the plan context + * @param[in] pfn_notify A function pointer to a notification routine. The notification routine is a callback function that + * an application can register and which will be called when the program executable has been built (successfully or unsuccessfully) + * <b> For Release 1.0,</b> this parameter MUST be NULL or nullptr. + * @param[in] user_data Passed as an argument when pfn_notify is called. + * <b> For Release 1.0,</b> this parameter MUST be NULL or nullptr. + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftBakePlan( clAmdFftPlanHandle plHandle, cl_uint numQueues, cl_command_queue* commQueueFFT, + void (CL_CALLBACK *pfn_notify)(clAmdFftPlanHandle plHandle, void *user_data), void* user_data ); + + /*! @brief Release the resources of a plan. + * @details A plan may include kernels, programs and buffers associated with it that consume memory. When a plan + * is not needed anymore, the client should release the plan. + * @param[in,out] plHandle Handle to a plan previously created + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftDestroyPlan( clAmdFftPlanHandle* plHandle ); + + /*! @brief Retrieve the OpenCL context of a previously created plan. + * @details User should pass a reference to an cl_context variable, which will be changed to point to a + * context set in the specified plan. + * @param[in] plHandle Handle to a plan previously created + * @param[out] context Reference to user allocated cl_context, which will point to context set in plan + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanContext( const clAmdFftPlanHandle plHandle, cl_context* context ); + + /*! @brief Retrieve the floating point precision of the FFT data + * @details User should pass a reference to an clAmdFftPrecision variable, which will be set to the + * precision of the FFT complex data in the plan. + * @param[in] plHandle Handle to a plan previously created + * @param[out] precision Reference to user clAmdFftPrecision enum + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanPrecision( const clAmdFftPlanHandle plHandle, clAmdFftPrecision* precision ); + + /*! @brief Set the floating point precision of the FFT data + * @details Set the plan property which will be the precision of the FFT complex data in the plan. + * @param[in] plHandle Handle to a plan previously created + * @param[in] precision Reference to user clAmdFftPrecision enum <p> + * <b> For Release 1.0,</b> only CLFFT_SINGLE and CLFFT_SINGLE_FAST are supported. + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanPrecision( clAmdFftPlanHandle plHandle, clAmdFftPrecision precision ); + + /*! @brief Retrieve the scaling factor that should be applied to the FFT data + * @details User should pass a reference to an cl_float variable, which will be set to the + * floating point scaling factor that will be multiplied across the FFT data. + * @param[in] plHandle Handle to a plan previously created + * @param[in] dir Which direction does the scaling factor apply to + * @param[out] scale Reference to user cl_float variable + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanScale( const clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float* scale ); + + /*! @brief Set the scaling factor that should be applied to the FFT data + * @details Set the plan property which will be the floating point scaling factor that will be + * multiplied across the FFT data. + * @param[in] plHandle Handle to a plan previously created + * @param[in] dir Which direction does the scaling factor apply to + * @param[in] scale Reference to user cl_float variable + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanScale( clAmdFftPlanHandle plHandle, clAmdFftDirection dir, cl_float scale ); + + /*! @brief Retrieve the number of discrete arrays that this plan can handle concurrently + * @details User should pass a reference to an cl_uint variable, which will be set to the + * number of discrete arrays (1D or 2D) that will be batched together for this plan + * @param[in] plHandle Handle to a plan previously created + * @param[out] batchSize How many discrete number of FFT's are to be performed + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanBatchSize( const clAmdFftPlanHandle plHandle, size_t* batchSize ); + + /*! @brief Set the number of discrete arrays that this plan can handle concurrently + * @details Set the plan property which will be set to the number of discrete arrays (1D or 2D) + * that will be batched together for this plan + * @param[in] plHandle Handle to a plan previously created + * @param[in] batchSize How many discrete number of FFT's are to be performed + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanBatchSize( clAmdFftPlanHandle plHandle, size_t batchSize ); + + /*! @brief Retrieve the dimensionality of FFT's to be transformed in the plan + * @details Queries a plan object and retrieves the dimensionality that the plan is set for. A size is returned to + * help the client allocate the proper storage to hold the dimensions in a further call to clAmdFftGetPlanLength + * @param[in] plHandle Handle to a plan previously created + * @param[out] dim The dimensionality of the FFT's to be transformed + * @param[out] size Value used to allocate an array to hold the FFT dimensions. + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanDim( const clAmdFftPlanHandle plHandle, clAmdFftDim* dim, cl_uint* size ); + + /*! @brief Set the dimensionality of FFT's to be transformed by the plan + * @details Set the dimensionality of FFT's to be transformed by the plan + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimensionality of the FFT's to be transformed + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanDim( clAmdFftPlanHandle plHandle, const clAmdFftDim dim ); + + /*! @brief Retrieve the length of each dimension of the FFT + * @details User should pass a reference to a size_t array, which will be set to the + * length of each discrete dimension of the FFT + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the length parameters; describes how many elements are in the array + * @param[out] clLengths An array of lengths, of size 'dim'. Each array value describes the length of each dimension + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanLength( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clLengths ); + + /*! @brief Set the length of each dimension of the FFT + * @details Set the plan property which will be the length of each discrete dimension of the FFT + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the length parameters; describes how many elements are in the array + * @param[in] clLengths An array of lengths, of size 'dim'. Each value describes the length of additional dimensions + * <p><b> For Release 1.0, </b> All lengths must be powers of 2. Non-power-of-two dimensions should be supported in a future release. + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanLength( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, const size_t* clLengths ); + + /*! @brief Retrieve the distance between consecutive elements for input buffers in a dimension. + * @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely + * ignored + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array + * @param[out] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. + * Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored contiguously. + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanInStride( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); + + /*! @brief Set the distance between consecutive elements for input buffers in a dimension. + * @details Set the plan properties which will be the distance between elements in a given dimension + * (units are in terms of clAmdFftPrecision) + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array + * @param[in] clStrides An array of strides, of size 'dim'. + * See @ref DistanceStridesandPitches for details. + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanInStride( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); + + /*! @brief Retrieve the distance between consecutive elements for output buffers in a dimension. + * @details Depending on how the dimension is set in the plan (for 2D or 3D FFT's), strideY or strideZ can be safely + * ignored + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array + * @param[out] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. + * Typically strideY=LenX, strideZ=LenX*LenYsuch that successive elements in the second and third dimensions are stored contiguously. + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanOutStride( const clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); + + /*! @brief Set the distance between consecutive elements for output buffers in a dimension. + * @details Set the plan properties which will be the distance between elements in a given dimension + * (units are in terms of clAmdFftPrecision) + * @param[in] plHandle Handle to a plan previously created + * @param[in] dim The dimension of the stride parameters; describes how many elements are in the array + * @param[in] clStrides An array of strides, of size 'dim'. Usually strideX=1 so that successive elements in the first dimension are stored contiguously. + * Typically strideY=LenX, strideZ=LenX*LenY such that successive elements in the second and third dimensions are stored contiguously. + * @sa clAmdFftSetPlanInStride + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanOutStride( clAmdFftPlanHandle plHandle, const clAmdFftDim dim, size_t* clStrides ); + + /*! @brief Retrieve the distance between Array objects + * @details Pitch is the distance between each discrete array object in an FFT array. This is only used + * for 'array' dimensions in clAmdFftDim; see clAmdFftSetPlanDimension (units are in terms of clAmdFftPrecision) + * @param[in] plHandle Handle to a plan previously created + * @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input. + * For contiguous arrays in memory, iDist=(strideX*strideY*strideZ) + * @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output. + * For contiguous arrays in memory, oDist=(strideX*strideY*strideZ) + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanDistance( const clAmdFftPlanHandle plHandle, size_t* iDist, size_t* oDist ); + + /*! @brief Set the distance between Array objects + * @details Pitch is the distance between each discrete array object in an FFT array. This is only used + * for 'array' dimensions in clAmdFftDim; see clAmdFftSetPlanDimension (units are in terms of clAmdFftPrecision) + * @param[in] plHandle Handle to a plan previously created + * @param[out] iDist The distance between the beginning elements of the discrete array objects in memory on input. + * For contiguous arrays in memory, iDist=(strideX*strideY*strideZ) + * @param[out] oDist The distance between the beginning elements of the discrete array objects in memory on output. + * For contiguous arrays in memory, oDist=(strideX*strideY*strideZ) + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanDistance( clAmdFftPlanHandle plHandle, size_t iDist, size_t oDist ); + + /*! @brief Retrieve the expected layout of the input and output buffers + * @details Output buffers can be filled with either hermitian or complex numbers. Complex numbers can be stored + * in various layouts; this informs the FFT engine what layout to produce on output + * @param[in] plHandle Handle to a plan previously created + * @param[out] iLayout Indicates how the input buffers are laid out in memory + * @param[out] oLayout Indicates how the output buffers are laid out in memory + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetLayout( const clAmdFftPlanHandle plHandle, clAmdFftLayout* iLayout, clAmdFftLayout* oLayout ); + + /*! @brief Set the expected layout of the input and output buffers + * @details Output buffers can be filled with either hermitian or complex numbers. Complex numbers can be stored + * in various layouts; this informs the FFT engine what layout to produce on output + * @param[in] plHandle Handle to a plan previously created + * @param[in] iLayout Indicates how the input buffers are laid out in memory + * @param[in] oLayout Indicates how the output buffers are laid out in memory + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetLayout( clAmdFftPlanHandle plHandle, clAmdFftLayout iLayout, clAmdFftLayout oLayout ); + + /*! @brief Retrieve whether the input buffers are going to be overwritten with results + * @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the + * transform. If the setting is for out-of-place transforms, the engine knows to look for separate output buffers + * on the Enqueue call. + * @param[in] plHandle Handle to a plan previously created + * @param[out] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetResultLocation( const clAmdFftPlanHandle plHandle, clAmdFftResultLocation* placeness ); + + /*! @brief Set whether the input buffers are going to be overwritten with results + * @details If the setting is to do an in-place transform, the input buffers are overwritten with the results of the + * transform. If the setting is for out-of-place transforms, the engine knows to look for separate output buffers + * on the Enqueue call. + * @param[in] plHandle Handle to a plan previously created + * @param[in] placeness Tells the FFT engine to clobber the input buffers or to expect output buffers for results + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetResultLocation( clAmdFftPlanHandle plHandle, clAmdFftResultLocation placeness ); + + /*! @brief Retrieve the final transpose setting of a muti-dimensional FFT + * @details A multi-dimensional FFT typically transposes the data several times during calculation. If the client + * does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped + * for possible speed improvements + * @param[in] plHandle Handle to a plan previously created + * @param[out] transposed Parameter specifies whether the final transpose can be skipped + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetPlanTransposeResult( const clAmdFftPlanHandle plHandle, clAmdFftResultTransposed * transposed ); + + /*! @brief Set the final transpose setting of a muti-dimensional FFT + * @details A multi-dimensional FFT typically transposes the data several times during calculation. If the client + * does not care about the final transpose to put data back in proper dimension, the final transpose can be skipped + * for possible speed improvements + * @param[in] plHandle Handle to a plan previously created + * @param[in] transposed Parameter specifies whether the final transpose can be skipped + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftSetPlanTransposeResult( clAmdFftPlanHandle plHandle, clAmdFftResultTransposed transposed ); + + + /*! @brief Get buffer size (in bytes), which may be needed internally for an intermediate buffer + * @details Very large FFT transforms may need multiple passes, and the operation would need a temporary buffer to hold + * intermediate results. This function is only valid after the plan is baked, otherwise an invalid operation error + * is returned. If buffersize returns as 0, the runtime needs no temporary buffer. + * @param[in] plHandle Handle to a plan previously created + * @param[out] buffersize Size in bytes for intermediate buffer + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftGetTmpBufSize( const clAmdFftPlanHandle plHandle, size_t* buffersize ); + + /*! @brief Enqueue an FFT transform operation, and return immediately (non-blocking) + * @details This transform API is specific to the interleaved complex format, taking an input buffer with real and imaginary + * components paired together, and outputting the results into an output buffer in the same format + * @param[in] plHandle Handle to a plan previously created + * @param[in] dir Forwards or backwards transform + * @param[in] numQueuesAndEvents Number of command queues in commQueues; number of expected events to be returned in outEvents + * @param[in] commQueues An array of cl_command_queues created by the client; the command queues must be a proper subset of + * the devices included in the plan context + * @param[in] numWaitEvents Specify the number of elements in the eventWaitList array + * @param[in] waitEvents Events that this transform should wait to complete before executing on the device + * @param[out] outEvents The runtime fills this array with events corresponding 1 to 1 with the input command queues passed + * in commQueues. This parameter can be NULL or nullptr, in which case client is not interested in receiving notifications + * when transforms are finished, otherwise if not NULL the client is responsible for allocating this array, with at least + * as many elements as specified in numQueuesAndEvents. + * @param[in] inputBuffers An array of cl_mem objects that contain data for processing by the FFT runtime. If the transform + * is in place, the FFT results will overwrite the input buffers + * @param[out] outputBuffers An array of cl_mem objects that will store the results of out of place transforms. If the transform + * is in place, this parameter may be NULL or nullptr. It is completely ignored + * @param[in] tmpBuffer A cl_mem object that is reserved as a temporary buffer for FFT processing. If clTmpBuffers is NULL or nullptr, + * and the runtime needs temporary storage, an internal temporary buffer will be created on the fly managed by the runtime. + * @return Enum describing error condition; superset of OpenCL error codes + */ + CLAMDFFTAPI clAmdFftStatus clAmdFftEnqueueTransform( + clAmdFftPlanHandle plHandle, + clAmdFftDirection dir, + cl_uint numQueuesAndEvents, + cl_command_queue* commQueues, + cl_uint numWaitEvents, + const cl_event* waitEvents, + cl_event* outEvents, + cl_mem* inputBuffers, + cl_mem* outputBuffers, + cl_mem tmpBuffer + ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/RTCP/GPUProc/clAmdFft/samples/clAmdFft.openCL.cpp b/RTCP/GPUProc/clAmdFft/samples/clAmdFft.openCL.cpp new file mode 100644 index 0000000000000000000000000000000000000000..5cba77240f0a93dda5a945efe34957a1a814c67f --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/clAmdFft.openCL.cpp @@ -0,0 +1,522 @@ +//////////////////////////////////////////// +// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. +//////////////////////////////////////////// + +// clAmdFft.opencl.cpp : Provides functions to set up openCL +// + +#include "stdafx.h" +#include <stdexcept> +#include <iomanip> +#include <sstream> +#include <cstring> +#include <vector> +#include "clAmdFft.h" +#include "clAmdFft.openCL.h" + +void prettyPrintPlatformInfo( const cl_platform_id& pId ) +{ + size_t platformProfileSize = 0; + OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_PROFILE, 0, NULL, &platformProfileSize ), + "Getting CL_PLATFORM_PROFILE Platform Info string size ( ::clGetPlatformInfo() )" ); + + std::vector< char > szPlatformProfile( platformProfileSize ); + OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_PROFILE, platformProfileSize, &szPlatformProfile[ 0 ], NULL), + "Getting CL_PLATFORM_PROFILE Platform Info string ( ::clGetPlatformInfo() )" ); + + size_t platformVersionSize = 0; + OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VERSION, 0, NULL, &platformVersionSize ), + "Getting CL_PLATFORM_VERSION Platform Info string size ( ::clGetPlatformInfo() )" ); + + std::vector< char > szPlatformVersion( platformVersionSize ); + OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VERSION, platformVersionSize, &szPlatformVersion[ 0 ], NULL), + "Getting CL_PLATFORM_VERSION Platform Info string ( ::clGetPlatformInfo() )" ); + + size_t platformNameSize = 0; + OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_NAME, 0, NULL, &platformNameSize ), + "Getting CL_PLATFORM_NAME Platform Info string size ( ::clGetPlatformInfo() )" ); + + std::vector< char > szPlatformName( platformNameSize ); + OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_NAME, platformNameSize, &szPlatformName[ 0 ], NULL), + "Getting CL_PLATFORM_NAME Platform Info string ( ::clGetPlatformInfo() )" ); + + size_t vendorStringSize = 0; + OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VENDOR, 0, NULL, &vendorStringSize ), + "Getting CL_PLATFORM_VENDOR Platform Info string size ( ::clGetPlatformInfo() )" ); + + std::vector< char > szPlatformVendor( vendorStringSize ); + OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_VENDOR, vendorStringSize, &szPlatformVendor[ 0 ], NULL), + "Getting CL_PLATFORM_VENDOR Platform Info string ( ::clGetPlatformInfo() )" ); + + size_t platformExtensionsSize = 0; + OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_EXTENSIONS, 0, NULL, &platformExtensionsSize ), + "Getting CL_PLATFORM_EXTENSIONS Platform Info string size ( ::clGetPlatformInfo() )" ); + + std::vector< char > szPlatformExtensions( platformExtensionsSize ); + OPENCL_V_THROW( ::clGetPlatformInfo( pId, CL_PLATFORM_EXTENSIONS, platformExtensionsSize, &szPlatformExtensions[ 0 ], NULL), + "Getting CL_PLATFORM_EXTENSIONS Platform Info string ( ::clGetPlatformInfo() )" ); + + const int indent = countOf( " CL_PLATFORM_EXTENSIONS: " ); + std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_PROFILE: " << &szPlatformProfile[ 0 ] << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_VERSION: " << &szPlatformVersion[ 0 ] << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_NAME: " << &szPlatformName[ 0 ] << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_VENDOR: " << &szPlatformVendor[ 0 ] << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_PLATFORM_EXTENSIONS: " << &szPlatformExtensions[ 0 ] << std::endl; + std::cout << std::right << std::endl; +} + +void prettyPrintDeviceInfo( const cl_device_id& dId ) +{ + size_t deviceNameSize = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_NAME, 0, NULL, &deviceNameSize ), + "Getting CL_DEVICE_NAME Platform Info string size ( ::clGetDeviceInfo() )" ); + + std::vector< char > szDeviceName( deviceNameSize ); + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_NAME, deviceNameSize, &szDeviceName[ 0 ], NULL ), + "Getting CL_DEVICE_NAME Platform Info string ( ::clGetDeviceInfo() )" ); + + size_t deviceVersionSize = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_VERSION, 0, NULL, &deviceVersionSize ), + "Getting CL_DEVICE_VERSION Platform Info string size ( ::clGetDeviceInfo() )" ); + + std::vector< char > szDeviceVersion( deviceVersionSize ); + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_VERSION, deviceVersionSize, &szDeviceVersion[ 0 ], NULL ), + "Getting CL_DEVICE_VERSION Platform Info string ( ::clGetDeviceInfo() )" ); + + size_t driverVersionSize = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DRIVER_VERSION, 0, NULL, &driverVersionSize ), + "Getting CL_DRIVER_VERSION Platform Info string size ( ::clGetDeviceInfo() )" ); + + std::vector< char > szDriverVersion( driverVersionSize ); + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DRIVER_VERSION, driverVersionSize, &szDriverVersion[ 0 ], NULL ), + "Getting CL_DRIVER_VERSION Platform Info string ( ::clGetDeviceInfo() )" ); + + size_t openCLVersionSize = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &openCLVersionSize ), + "Getting CL_DEVICE_OPENCL_C_VERSION Platform Info string size ( ::clGetDeviceInfo() )" ); + + std::vector< char > szOpenCLVersion( openCLVersionSize ); + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_OPENCL_C_VERSION, openCLVersionSize, &szOpenCLVersion[ 0 ], NULL ), + "Getting CL_DEVICE_OPENCL_C_VERSION Platform Info string ( ::clGetDeviceInfo() )" ); + + cl_device_type devType = CL_DEVICE_TYPE_DEFAULT; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_TYPE, sizeof( cl_device_type ), &devType, NULL ), + "Getting CL_DEVICE_TYPE device info ( ::clGetDeviceInfo() )" ); + + cl_uint devAddrBits = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_ADDRESS_BITS, sizeof( cl_uint ), &devAddrBits, NULL ), + "Getting CL_DEVICE_ADDRESS_BITS device info ( ::clGetDeviceInfo() )" ); + + cl_uint maxClockFreq = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof( cl_uint ), &maxClockFreq, NULL ), + "Getting CL_DEVICE_MAX_CLOCK_FREQUENCY device info ( ::clGetDeviceInfo() )" ); + + cl_bool devAvailable = CL_FALSE; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_AVAILABLE, sizeof( cl_bool ), &devAvailable, NULL ), + "Getting CL_DEVICE_AVAILABLE device info ( ::clGetDeviceInfo() )" ); + + cl_bool devCompAvailable = CL_FALSE; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_COMPILER_AVAILABLE, sizeof( cl_bool ), &devCompAvailable, NULL ), + "Getting CL_DEVICE_COMPILER_AVAILABLE device info ( ::clGetDeviceInfo() )" ); + + size_t devMaxWorkGroup = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( size_t ), &devMaxWorkGroup, NULL ), + "Getting CL_DEVICE_MAX_WORK_GROUP_SIZE device info ( ::clGetDeviceInfo() )" ); + + cl_uint devMaxWorkItemDim = CL_FALSE; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( cl_uint ), &devMaxWorkItemDim, NULL ), + "Getting CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS device info ( ::clGetDeviceInfo() )" ); + + std::vector< size_t > devMaxWorkItemSizes( devMaxWorkItemDim ); + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof( size_t )*devMaxWorkItemSizes.size( ), &devMaxWorkItemSizes[0], NULL), + "Getting CL_DEVICE_MAX_WORK_ITEM_SIZES device info ( ::clGetDeviceInfo() )" ); + + cl_bool deviceHostUnified = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof( cl_bool ), &deviceHostUnified, NULL ), + "Getting CL_DEVICE_HOST_UNIFIED_MEMORY Platform Info string ( ::clGetDeviceInfo() )" ); + + cl_ulong devMaxConstantBuffer = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( cl_ulong ), &devMaxConstantBuffer, NULL ), + "Getting CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE device info ( ::clGetDeviceInfo() )" ); + + cl_ulong devLocalMemSize = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( cl_ulong ), &devLocalMemSize, NULL ), + "Getting CL_DEVICE_LOCAL_MEM_SIZE device info ( ::clGetDeviceInfo() )" ); + + cl_ulong deviceGlobalMemSize = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( cl_ulong ), &deviceGlobalMemSize, NULL ), + "Getting CL_DEVICE_GLOBAL_MEM_SIZE device info ( ::clGetDeviceInfo() )" ); + + cl_ulong deviceMaxMemAllocSize = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( cl_ulong ), &deviceMaxMemAllocSize, NULL ), + "Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )" ); + + size_t deviceExtSize = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_EXTENSIONS, 0, NULL, &deviceExtSize ), + "Getting CL_DEVICE_EXTENSIONS Platform Info string size ( ::clGetDeviceInfo() )" ); + + std::vector< char > szDeviceExt( deviceExtSize ); + OPENCL_V_THROW( ::clGetDeviceInfo( dId, CL_DEVICE_EXTENSIONS, deviceExtSize, &szDeviceExt[ 0 ], NULL ), + "Getting CL_DEVICE_EXTENSIONS Platform Info string ( ::clGetDeviceInfo() )" ); + + const int indent = countOf( " CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: " ); + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_NAME: " << &szDeviceName[ 0 ] << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_VERSION: " << &szDeviceVersion[ 0 ] << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DRIVER_VERSION: " << &szDriverVersion[ 0 ] << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_TYPE: " + << (CL_DEVICE_TYPE_DEFAULT & devType ? "default" : "") + << (CL_DEVICE_TYPE_CPU & devType ? "CPU" : "") + << (CL_DEVICE_TYPE_GPU & devType ? "GPU" : "") + << (CL_DEVICE_TYPE_ACCELERATOR & devType ? "Accelerator" : "") + << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_CLOCK_FREQUENCY: " << maxClockFreq << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_ADDRESS_BITS: " << devAddrBits << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_AVAILABLE: " << ( devAvailable ? "TRUE": "FALSE") << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_COMPILER_AVAILABLE: " << ( devCompAvailable ? "TRUE": "FALSE") << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_OPENCL_C_VERSION: " << &szOpenCLVersion[ 0 ] << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_WORK_GROUP_SIZE: " << devMaxWorkGroup << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: " << devMaxWorkItemDim << std::endl; + for( cl_uint wis = 0; wis < devMaxWorkItemSizes.size( ); ++wis ) + { + std::stringstream dimString; + dimString << "Dimension[ " << wis << " ] "; + std::cout << std::right << std::setw( indent ) << dimString.str( ) << devMaxWorkItemSizes[wis] << std::endl; + } + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_HOST_UNIFIED_MEMORY: " << ( deviceHostUnified ? "TRUE": "FALSE") << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: " << devMaxConstantBuffer; + std::cout << " ( " << devMaxConstantBuffer / 1024 << " KB )" << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_LOCAL_MEM_SIZE: " << devLocalMemSize; + std::cout << " ( " << devLocalMemSize / 1024 << " KB )" << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_GLOBAL_MEM_SIZE: " << deviceGlobalMemSize; + std::cout << " ( " << deviceGlobalMemSize / 1048576 << " MB )" << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_MAX_MEM_ALLOC_SIZE: " << deviceMaxMemAllocSize; + std::cout << " ( " << deviceMaxMemAllocSize / 1048576 << " MB )" << std::endl; + std::cout << std::left << std::setw( indent ) << " CL_DEVICE_EXTENSIONS: " << &szDeviceExt[ 0 ] << std::endl; + + std::cout << std::right << std::endl; +} + +// Verify a failed condition; return true on fail +inline cl_bool OPENCL_V_FAIL( cl_int res ) +{ + if( res == CL_SUCCESS ) + return CL_FALSE; + else + return CL_TRUE; +} + +std::string prettyPrintclFFTStatus( const cl_int& status ) +{ + switch( status ) + { + case CLFFT_INVALID_GLOBAL_WORK_SIZE: + return "CLFFT_INVALID_GLOBAL_WORK_SIZE"; + case CLFFT_INVALID_MIP_LEVEL: + return "CLFFT_INVALID_MIP_LEVEL"; + case CLFFT_INVALID_BUFFER_SIZE: + return "CLFFT_INVALID_BUFFER_SIZE"; + case CLFFT_INVALID_GL_OBJECT: + return "CLFFT_INVALID_GL_OBJECT"; + case CLFFT_INVALID_OPERATION: + return "CLFFT_INVALID_OPERATION"; + case CLFFT_INVALID_EVENT: + return "CLFFT_INVALID_EVENT"; + case CLFFT_INVALID_EVENT_WAIT_LIST: + return "CLFFT_INVALID_EVENT_WAIT_LIST"; + case CLFFT_INVALID_GLOBAL_OFFSET: + return "CLFFT_INVALID_GLOBAL_OFFSET"; + case CLFFT_INVALID_WORK_ITEM_SIZE: + return "CLFFT_INVALID_WORK_ITEM_SIZE"; + case CLFFT_INVALID_WORK_GROUP_SIZE: + return "CLFFT_INVALID_WORK_GROUP_SIZE"; + case CLFFT_INVALID_WORK_DIMENSION: + return "CLFFT_INVALID_WORK_DIMENSION"; + case CLFFT_INVALID_KERNEL_ARGS: + return "CLFFT_INVALID_KERNEL_ARGS"; + case CLFFT_INVALID_ARG_SIZE: + return "CLFFT_INVALID_ARG_SIZE"; + case CLFFT_INVALID_ARG_VALUE: + return "CLFFT_INVALID_ARG_VALUE"; + case CLFFT_INVALID_ARG_INDEX: + return "CLFFT_INVALID_ARG_INDEX"; + case CLFFT_INVALID_KERNEL: + return "CLFFT_INVALID_KERNEL"; + case CLFFT_INVALID_KERNEL_DEFINITION: + return "CLFFT_INVALID_KERNEL_DEFINITION"; + case CLFFT_INVALID_KERNEL_NAME: + return "CLFFT_INVALID_KERNEL_NAME"; + case CLFFT_INVALID_PROGRAM_EXECUTABLE: + return "CLFFT_INVALID_PROGRAM_EXECUTABLE"; + case CLFFT_INVALID_PROGRAM: + return "CLFFT_INVALID_PROGRAM"; + case CLFFT_INVALID_BUILD_OPTIONS: + return "CLFFT_INVALID_BUILD_OPTIONS"; + case CLFFT_INVALID_BINARY: + return "CLFFT_INVALID_BINARY"; + case CLFFT_INVALID_SAMPLER: + return "CLFFT_INVALID_SAMPLER"; + case CLFFT_INVALID_IMAGE_SIZE: + return "CLFFT_INVALID_IMAGE_SIZE"; + case CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR: + return "CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR"; + case CLFFT_INVALID_MEM_OBJECT: + return "CLFFT_INVALID_MEM_OBJECT"; + case CLFFT_INVALID_HOST_PTR: + return "CLFFT_INVALID_HOST_PTR"; + case CLFFT_INVALID_COMMAND_QUEUE: + return "CLFFT_INVALID_COMMAND_QUEUE"; + case CLFFT_INVALID_QUEUE_PROPERTIES: + return "CLFFT_INVALID_QUEUE_PROPERTIES"; + case CLFFT_INVALID_CONTEXT: + return "CLFFT_INVALID_CONTEXT"; + case CLFFT_INVALID_DEVICE: + return "CLFFT_INVALID_DEVICE"; + case CLFFT_INVALID_PLATFORM: + return "CLFFT_INVALID_PLATFORM"; + case CLFFT_INVALID_DEVICE_TYPE: + return "CLFFT_INVALID_DEVICE_TYPE"; + case CLFFT_INVALID_VALUE: + return "CLFFT_INVALID_VALUE"; + case CLFFT_MAP_FAILURE: + return "CLFFT_MAP_FAILURE"; + case CLFFT_BUILD_PROGRAM_FAILURE: + return "CLFFT_BUILD_PROGRAM_FAILURE"; + case CLFFT_IMAGE_FORMAT_NOT_SUPPORTED: + return "CLFFT_IMAGE_FORMAT_NOT_SUPPORTED"; + case CLFFT_IMAGE_FORMAT_MISMATCH: + return "CLFFT_IMAGE_FORMAT_MISMATCH"; + case CLFFT_MEM_COPY_OVERLAP: + return "CLFFT_MEM_COPY_OVERLAP"; + case CLFFT_PROFILING_INFO_NOT_AVAILABLE: + return "CLFFT_PROFILING_INFO_NOT_AVAILABLE"; + case CLFFT_OUT_OF_HOST_MEMORY: + return "CLFFT_OUT_OF_HOST_MEMORY"; + case CLFFT_OUT_OF_RESOURCES: + return "CLFFT_OUT_OF_RESOURCES"; + case CLFFT_MEM_OBJECT_ALLOCATION_FAILURE: + return "CLFFT_MEM_OBJECT_ALLOCATION_FAILURE"; + case CLFFT_COMPILER_NOT_AVAILABLE: + return "CLFFT_COMPILER_NOT_AVAILABLE"; + case CLFFT_DEVICE_NOT_AVAILABLE: + return "CLFFT_DEVICE_NOT_AVAILABLE"; + case CLFFT_DEVICE_NOT_FOUND: + return "CLFFT_DEVICE_NOT_FOUND"; + case CLFFT_SUCCESS: + return "CLFFT_SUCCESS"; + case CLFFT_NOTIMPLEMENTED: + return "CLFFT_NOTIMPLEMENTED"; + case CLFFT_FILE_NOT_FOUND: + return "CLFFT_FILE_NOT_FOUND"; + case CLFFT_FILE_CREATE_FAILURE: + return "CLFFT_FILE_CREATE_FAILURE"; + case CLFFT_VERSION_MISMATCH: + return "CLFFT_VERSION_MISMATCH"; + case CLFFT_INVALID_PLAN: + return "CLFFT_INVALID_PLAN"; + default: + return "Error code not defined"; + break; + } +} + +std::vector< cl_device_id > initializeCL( cl_device_type deviceType, + cl_uint deviceGpuList, + cl_context& context, + bool printclInfo ) +{ + cl_int status = 0; + + /* + * Have a look at the available platforms and pick either + * the AMD one if available or a reasonable default. + */ + + cl_uint numPlatforms = 0; + cl_platform_id platform = NULL; + OPENCL_V_THROW( ::clGetPlatformIDs( 0, NULL, &numPlatforms ), + "Getting number of platforms( ::clGetPlatformsIDs() )" ); + + if( numPlatforms > 0 ) + { + std::vector< cl_platform_id > platforms( numPlatforms ); + OPENCL_V_THROW( ::clGetPlatformIDs( numPlatforms, &platforms[ 0 ], NULL ), + "Getting Platform Id's ( ::clGetPlatformsIDs() )" ); + + // TODO: How should we determine what platform to choose? We are just defaulting to the last one reported, as we + // print out the info + for( unsigned int i=0; i < numPlatforms; ++i ) + { + if( printclInfo ) + { + std::cout << "OpenCL platform [ " << i << " ]:" << std::endl; + prettyPrintPlatformInfo( platforms[i] ); + } + + platform = platforms[i]; + } + } + + if( NULL == platform ) + { + throw std::runtime_error( "No appropriate OpenCL platform could be found" ); + } + + /* + * If we could find our platform, use it. Otherwise use just available platform. + */ + + // Get the device list for this type. + // + cl_uint num_devices = 0; + OPENCL_V_THROW( ::clGetDeviceIDs( platform, deviceType, 0, NULL, &num_devices ), + "Getting OpenCL devices ( ::clGetDeviceIDs() )" ); + if( 0 == num_devices ) + { + OPENCL_V_THROW( CLFFT_DEVICE_NOT_AVAILABLE, "No devices available"); + } + + std::vector< cl_device_id > deviceIDs( num_devices ); + OPENCL_V_THROW( ::clGetDeviceIDs( platform, deviceType, num_devices, &deviceIDs[0], NULL), + "Getting OpenCL deviceIDs ( ::clGetDeviceIDs() )" ); + + if( (CL_DEVICE_TYPE_GPU == deviceType) && (~cl_uint(0) != deviceGpuList) ) + { + // The command line options specify to user certain gpu(s) + // + for( unsigned u = (unsigned) deviceIDs.size(); u-- > 0; ) + { + if( 0 != (deviceGpuList & (1<<u) ) ) + continue; + + // Remove this GPU from the list + deviceIDs[u] = deviceIDs.back(); + deviceIDs.pop_back(); + } + } + + if( 0 == deviceIDs.size( ) ) + { + OPENCL_V_THROW( CLFFT_DEVICE_NOT_AVAILABLE, "No devices available"); + } + + cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 }; + + ///////////////////////////////////////////////////////////////// + // Create an OpenCL context + ///////////////////////////////////////////////////////////////// + context = clCreateContext( cps, + (cl_uint) deviceIDs.size(), + & deviceIDs[0], + NULL, + NULL, + &status); + OPENCL_V_THROW( status, "Creating Context ( ::clCreateContextFromType() )" ); + + /* First, get the size of device list data */ + size_t deviceListSize; + OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, 0, NULL, &deviceListSize ), + "Getting device array size ( ::clGetContextInfo() )" ); + + ///////////////////////////////////////////////////////////////// + // Detect OpenCL devices + ///////////////////////////////////////////////////////////////// + std::vector< cl_device_id > devices( deviceListSize/sizeof( cl_device_id ) ); + + /* Now, get the device list data */ + OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_DEVICES, deviceListSize, &devices[ 0 ], NULL ), + "Getting device array ( ::clGetContextInfo() )" ); + + if( printclInfo ) + { + cl_uint cContextDevices = 0; + + size_t deviceVersionSize = 0; + OPENCL_V_THROW( ::clGetDeviceInfo( devices[0], CL_DEVICE_VERSION, 0, NULL, &deviceVersionSize ), + "Getting CL_DEVICE_VERSION Platform Info string size ( ::clGetDeviceInfo() )" ); + + std::vector< char > szDeviceVersion( deviceVersionSize ); + OPENCL_V_THROW( ::clGetDeviceInfo( devices[0], CL_DEVICE_VERSION, deviceVersionSize, &szDeviceVersion[ 0 ], NULL ), + "Getting CL_DEVICE_VERSION Platform Info string ( ::clGetDeviceInfo() )" ); + + char openclstr[11]="OpenCL 1.0"; + + if (!strncmp((const char*)&szDeviceVersion[ 0 ], openclstr, 10)) + { + cContextDevices = 1; + } + else + { + OPENCL_V_THROW( ::clGetContextInfo( context, CL_CONTEXT_NUM_DEVICES, sizeof( cContextDevices ), &cContextDevices, NULL ), + "Getting number of context devices ( ::clGetContextInfo() )" ); + } + + for( cl_uint i = 0; i < cContextDevices; ++i ) + { + std::cout << "OpenCL devices [ " << i << " ]:" << std::endl; + prettyPrintDeviceInfo( devices[i] ); + } + } + + return devices; +} + +int cleanupCL( cl_context* context, cl_command_queue* commandQueue, + const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent ) +{ + if( *outEvent != NULL ) + OPENCL_V_THROW( clReleaseEvent( *outEvent ), "Error: In clReleaseEvent\n" ); + + releaseOpenCLMemBuffer( numBuffersIn, inputBuffer); + releaseOpenCLMemBuffer( numBuffersOut, outputBuffer); + + if( *commandQueue != NULL ) + OPENCL_V_THROW( clReleaseCommandQueue( *commandQueue ), "Error: In clReleaseCommandQueue\n" ); + + if( *context != NULL ) + OPENCL_V_THROW( clReleaseContext( *context ), "Error: In clReleaseContext\n" ); + + return 0; +} + +int createOpenCLMemoryBuffer( cl_context& context, const size_t bufferSizeBytes, const cl_uint numBuffers, cl_mem buffer[], cl_mem_flags accessibility) { + cl_int status = 0; + + for( cl_uint i = 0; i < numBuffers; ++i ) + { + buffer[ i ] = ::clCreateBuffer( context, accessibility, bufferSizeBytes, NULL, &status); + OPENCL_V_THROW( status, "Creating Buffer ( ::clCreateBuffer() )" ); + } + + return 0; +} + +int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[]) +{ + for( cl_uint i = 0; i < numBuffers; ++i ) + { + if( buffer[ i ] != NULL ) + OPENCL_V_THROW( clReleaseMemObject( buffer[ i ] ), "Error: In clReleaseMemObject\n" ); + } + + return 0; +} + +void createOpenCLCommandQueue( cl_context& context, + cl_uint commandQueueFlags, + cl_command_queue& commandQueue, + std::vector< cl_device_id > devices, + const size_t bufferSizeBytesIn, + const cl_uint numBuffersIn, + cl_mem clMemBufferIn[], + const size_t bufferSizeBytesOut, + const cl_uint numBuffersOut, + cl_mem clMemBufferOut[] ) +{ + cl_int status = 0; + commandQueue = ::clCreateCommandQueue( context, devices[0], commandQueueFlags, &status ); + OPENCL_V_THROW( status, "Creating Command Queue ( ::clCreateCommandQueue() )" ); + + createOpenCLMemoryBuffer( context, bufferSizeBytesIn, numBuffersIn, clMemBufferIn, CL_MEM_READ_WRITE); + createOpenCLMemoryBuffer( context, bufferSizeBytesOut, numBuffersOut, clMemBufferOut, CL_MEM_READ_WRITE); +} + diff --git a/RTCP/GPUProc/clAmdFft/samples/clAmdFft.openCL.h b/RTCP/GPUProc/clAmdFft/samples/clAmdFft.openCL.h new file mode 100644 index 0000000000000000000000000000000000000000..cb82e024b7ac0d442e42d5d6eac645f0b3fbf408 --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/clAmdFft.openCL.h @@ -0,0 +1,97 @@ +//////////////////////////////////////////// +// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. +//////////////////////////////////////////// + +#pragma once +#if !defined( AMD_OPENCL_H ) +#define AMD_OPENCL_H +#include <memory> +#include <stdexcept> +#include "amd-unicode.h" + +// Creating a portable defintion of countof +#if defined( _WIN32 ) + #define countOf _countof +#else + #define countOf( arr ) ( sizeof( arr ) / sizeof( arr[ 0 ] ) ) +#endif + +/* + * \brief OpenCL related initialization (ripped from AMD stream sample code ) + * Create Context, Device list + * Load CL file, compile, link CL source + * Build program and kernel objects + */ +std::vector< cl_device_id > initializeCL( cl_device_type deviceType, + cl_uint deviceGpuList, + cl_context& context, + bool printclInfo ); + +/* + * \brief OpenCL memory buffer creation + */ +int createOpenCLMemoryBuffer( + cl_context& context, + const size_t bufferSizeBytes, + const cl_uint numBuffers, + cl_mem buffer[], + cl_mem_flags accessibility + ); + +/* + * \brief OpenCL command queue creation (ripped from AMD stream sample code ) + * Create Command Queue + * Create OpenCL memory buffer objects + */ +void createOpenCLCommandQueue( cl_context& context, + cl_uint commandQueueFlags, + cl_command_queue& commandQueue, + std::vector< cl_device_id > devices, + const size_t bufferSizeBytesIn, + const cl_uint numBuffersIn, + cl_mem clMemBufferIn[], + const size_t bufferSizeBytesOut, + const cl_uint numBuffersOut, + cl_mem clMemBufferOut[] ); + +/* + * \brief release OpenCL memory buffer + */ +int releaseOpenCLMemBuffer( const cl_uint numBuffers, cl_mem buffer[] ); + +std::string prettyPrintclFFTStatus( const cl_int& status ); + +// This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition. +// If an error occurs, we throw. +// Note: std::runtime_error does not take unicode strings as input, so only strings supported +inline cl_int OpenCL_V_Throw ( cl_int res, const std::string& msg, size_t lineno ) +{ + switch( res ) + { + case CL_SUCCESS: /**< No error */ + break; + default: + { + std::stringstream tmp; + tmp << "OPENCL_V_THROWERROR< "; + tmp << prettyPrintclFFTStatus( res ); + tmp << " > ("; + tmp << lineno; + tmp << "): "; + tmp << msg; + std::string errorm (tmp.str()); + std::cout << errorm<< std::endl; + throw std::runtime_error( errorm ); + } + } + + return res; +} +#define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw (_status, _message, __LINE__) + +/* + * \brief Release OpenCL resources (Context, Memory etc.) (ripped from AMD stream sample code ) + */ +int cleanupCL( cl_context* context, cl_command_queue* commandQueue, const cl_uint numBuffersIn, cl_mem inputBuffer[], const cl_uint numBuffersOut, cl_mem outputBuffer[], cl_event* outEvent ); + +#endif diff --git a/RTCP/GPUProc/clAmdFft/samples/clMemcpy.cpp b/RTCP/GPUProc/clAmdFft/samples/clMemcpy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a1c63fa57807ebffb40a785d18b8caac7010d73b --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/clMemcpy.cpp @@ -0,0 +1,998 @@ +//////////////////////////////////////////// +// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. +//////////////////////////////////////////// + +// clAmdFft.clMemcpy.cpp : OpenCL memory copy kernel generator +// +// +// +//////////////////////////////////////////////////////////////////////////////// + +// TODO: Add 2d/tiled memory copies. + +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <CL/opencl.h> +#include <iostream> +#include <vector> +#include <time.h> + +#include <sstream> +#include <string> +using std::stringstream; + +#include <boost/program_options.hpp> +namespace po = boost::program_options; + +//#include "../statTimer/clAmdFft.statisticalTimer.extern.h" +//#include "../include/clAmdFft.sharedLibrary.h" + +#include "../../../common/statisticalTimer.h" + +#include "../../../common/amd-unicode.h" + +class clDataType{ +public: + virtual bool setSize(size_t size) = 0; // set size + virtual size_t getSize() = 0; // size in bytes + virtual size_t getTypeSize() = 0; // size of base type in bytes + virtual std::string getName() = 0; // get cl type name +}; + +class clFloat:public clDataType{ +public: + + clFloat() + { + clSize = 2; + } + + size_t getSize() + { + return clSize * sizeof(float); + } + + size_t getTypeSize() + { + return sizeof(float); + } + + std::string getName() + { + std::stringstream name; + name << "float"; + if(clSize > 1) + { + name << clSize; + } + + std::string ret = name.str().c_str(); + return ret; + } + + bool setSize(size_t size) + { + if(size < 0 || size > 16 || (size & (size - 1)) ) + { + return false; + } + else + { + clSize = size; + return true; + } + } + // The null kernel generator has its own special set of paramters + +private: + size_t clSize; +}; + +class clDouble:public clDataType{ +public: + + clDouble() + { + clSize = 1; + } + + size_t getSize() + { + return clSize * sizeof(double); + } + + size_t getTypeSize() + { + return sizeof(double); + } + + std::string getName() + { + std::stringstream name; + name << "double"; + if(clSize > 1) + { + name << clSize; + } + + std::string ret = name.str().c_str(); + return ret; + } + + bool setSize(size_t size) + { + if(size < 0 || size > 4 || (size & (size - 1))) + { + return false; + } + else + { + clSize = size; + return true; + } + } + // The null kernel generator has its own special set of paramters + +private: + size_t clSize; +}; + +// This is a helper function to query a device for it's caps and check whether a certain user supplied cap is present +// stolen from the clAmdRuntime library +bool checkDevExt( std::string cap, std::vector< cl_device_id >& devices ) +{ + for( size_t d = 0; d < devices.size( ); ++d) + { + size_t deviceExtSize = 0; + ::clGetDeviceInfo( devices[ d ], CL_DEVICE_EXTENSIONS, 0, NULL, &deviceExtSize ), + "Getting CL_DEVICE_EXTENSIONS Platform Info string size ( ::clGetDeviceInfo() )"; + + std::vector< char > szDeviceExt( deviceExtSize ); + ::clGetDeviceInfo( devices[ d ], CL_DEVICE_EXTENSIONS, deviceExtSize, &szDeviceExt[ 0 ], NULL ), + "Getting CL_DEVICE_EXTENSIONS Platform Info string ( ::clGetDeviceInfo() )"; + + std::string strDeviceExt = &szDeviceExt[ 0 ]; + + if( strDeviceExt.find( cap.c_str( ), 0 ) == std::string::npos ) + return 0; + } + + return true; +} + +#define INDENT " " + +// memcpy kernel generator, very simple +// + void GenerateMemcpyKernel (stringstream &ssn, const int registerCount, const int dumbyRegisterCount, const int workGroupSize , clDataType * clType, const bool useBarrier, int ldsPasses, const int dataItemCount, const int writeOnly, const int readOnly, const int memcpyOnly, const bool supportDoublePrecision) +{ +// kernel generator - dumb + + //std::stringstream ssn (std::stringstream::out); + static const bool first_choice = true; + + ssn << "//------------------------------\n" + "// !!!!!NULL Memcopy KERNEL!!!!\n\n"; + + // include double precision support + + if(supportDoublePrecision) + { + ssn<< "\n#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n\n"; + } + + // set the workgroup size to our specification, this will effect the number of wavefronts used + ssn << "__attribute__((reqd_work_group_size(" << workGroupSize << ",1,1)))\n" + << "__kernel void\n" + << "memcpy" << "(\n"; + + +// basically do inplace memcopy unless memcpyOnly is true, then do out of place + + if(!memcpyOnly) + { + ssn << INDENT "__global " << clType->getName() << " *gcomplx\n"; + ssn << ")\n{\n"; + } + else + { + ssn << INDENT "__global const " << clType->getName() << " *in,\n"; + ssn << INDENT "__global " << clType->getName() << " *out\n"; + ssn << ")\n{\n"; + + // a strict memcopy kernel does not require much, just code it here and return + + ssn << INDENT "int gid = get_global_id(0);\n"; + ssn << INDENT "out[gid] = in[gid];\n"; + ssn << INDENT "return;"; + ssn << "\n}\n"; + + return; + } + + + // create registers for kernel to use for memcopies + ssn << "\n" << clType->getName() << " R0"; + for(int i = 1; i < registerCount + dumbyRegisterCount; i++) + { + ssn << ",R" << i; + } + + ssn << ";\n"; + + // identifiers for local work item and global group id + ssn << "\nuint me = get_local_id(0);"; + ssn << "\nuint batch = get_group_id(0);"; + + ssn << "\nglobal "; + + // if read only kernel use const to disable read caching + if(writeOnly) + { + ssn << "const "; + } + + ssn << clType->getName() << "* gpc = gcomplx + me * " << registerCount << " + batch * " << registerCount * workGroupSize << ";"; + + if(ldsPasses > 0) + { + // allocate LDS + ssn << "\n__local " << clType->getName() << " ldsBuff[" << registerCount * workGroupSize << "];"; + ssn << "\n__local " << clType->getName() << "* lds = ldsBuff + me * " << registerCount << ";"; + } + + ssn << "\n"; + + // If write only kernel, don't read back regs to global memory + if(writeOnly || !readOnly) + { + // copy data from Global Memory to regs + for(int i = 0; i < registerCount; i++) + { + ssn << "\nR" << i << "= gpc[" << i << "];"; + } + } + + ssn << "\n"; + + // make number of LDS passes specified, copy from regs to lds back to regs + for(int j = 0; j < ldsPasses; j++) + { + + // copy data from regs to LDS + for(int i = 0; i < registerCount; i++) + { + + ssn << "\nlds[" << i << "] = R" << i << ";"; + } + + ssn << "\n"; + + // insert memory barrier + if(useBarrier == true) + { + ssn << "\nbarrier(CLK_LOCAL_MEM_FENCE);\n"; + } + + // copy data from LDS back to regs + for(int i = 0; i < registerCount; i++) + { + ssn << "\nR" << i << " = lds[" << (registerCount -1 ) - i << "];"; + } + + ssn << "\n"; + + } + + // if dumby registers are specified, just assign a value to them + // do some math + int rIndex = 0; + for(int i = registerCount; i < registerCount + dumbyRegisterCount; i++) + { + if( i == registerCount) + { + ssn << "\nR" << i << " = R0 * 3.1459;"; + // ssn << "\nR0 = R" << i << ";"; // write results to R0 to kee it from being optimized out + } + else + { + ssn << "\nR" << i << " = R" << i <<" + R" << i - 1 << " * 3.1459;"; + } + + ssn << "\nR" << rIndex <<" = R" << i << ";"; + rIndex ++; + if(rIndex >= registerCount) + { + rIndex = 0; + } + } + + ssn << "\n"; + + // if readonly or not a writeonly kernel copy registers back to global memory + if(readOnly || !writeOnly) + { + for(int i = 0; i < registerCount; i++) + { + ssn << "\ngpc[" << i << "] = R" << i << ";"; + } + } + ssn << "\n}\n"; +} + +// http://cottonvibes.blogspot.com/2011/01/dynamically-allocate-aligned-memory.html +// Alignment must be power of 2 (1,2,4,8,16...2^15) +void* aligned_malloc(size_t size, size_t alignment) { + assert(alignment <= 0x8000); + uintptr_t r = (uintptr_t)malloc(size + --alignment + 2); + uintptr_t o = (r + 2 + alignment) & ~(uintptr_t)alignment; + if (!r) return NULL; + ((uint16_t*)o)[-1] = (uint16_t)(o-r); + return (void*)o; +} + +void aligned_free(void* p) { + if (!p) return; + free((void*)((uintptr_t)p-((uint16_t*)p)[-1])); +} + + +int main(int argc, char** argv) +{ + int err; // error code returned from api calls + + size_t global; // global domain size for our calculation + size_t local; // local domain size for our calculation + cl_platform_id platform; + cl_device_id device_id; // compute device id + cl_uint platforms; + cl_context context; // compute context + cl_command_queue commands; // compute command queue + cl_program program; // compute program + cl_kernel kernel; // compute kernel + + cl_mem input; // device memory used for the input array + cl_mem output; // device memory used for the output array for strict memcopy kernel + + cl_device_type deviceType = CL_DEVICE_TYPE_GPU; // make the GPU the default device type + + int workgroupSize = 0; // workgroup size / number of work items per wavefront + int registerCount = 16; // registers allocated in kernels for memcopy operations + int dumbyRegisterCount = 16; // registers allocated, but not used for memcopies + int dataItemCount = 0; // total number of items (type float,float2,4) to copy to/from OpenCL device + int dataItemCountEnd = 0; // total number of items (type float,float2,4) to copy to/from OpenCL device + int ldsPasses = 1; // number of 'passes' copying data to/from LDS + clDataType * clType; // default float type to use + bool useBarrier = true; // include memory barrier in kernels + bool memcpyOnly = false; // if true, creates strict memcopy kernels, not registers allocated (in CL code) + bool writeOnly = false; // only perform write operations + bool readOnly = false; // only perform read operations. + bool bDisableOptimization = false; // disable OpenCL compiler optimizations if true + bool bDoublePrecision = false; + bool bZeroMemcopy = false; // if true, host memory is used by GPU + + cl_ulong start = 0; // profiling start and end times + cl_ulong end = 0; + + clFloat lFloat; + clDouble lDouble; + clType = &lFloat; // float is default + + try + { + // Declare the supported options. + po::options_description desc( "clMemcpy client command line options" ); + desc.add_options() + ( "help,h", "produces this help message" ) + ( "version,v", "Print out build date/version" ) + ( "gpu,g", "Force instantiation of an OpenCL GPU device" ) + ( "cpu,c", "Force instantiation of an OpenCL CPU device" ) + ( "float,f", po::value< int >(), "Float type to use in kernels, 1,2,4,8,16 (default: float2)" ) + ( "double,d", po::value< int >(), "Use double type to use in kernels, 1,2,4 (default: double 1)" ) + ( "regs,r", po::value< int >( ®isterCount )->default_value( 16 ), "Specify number of registers to use in kernels (default: 16)" ) + ( "dumbyRegs,q", po::value< int >( &dumbyRegisterCount )->default_value( 0 ), "Specify number 'dumby registers' to allocate in kernels" ) + ( "memcpyOnly,m", "Generate strict memcopy kernel (default: false)" ) + ( "itemCount,i", po::value< int >( &dataItemCount )->default_value( 0 ), "Number of items to transfer (default: max allocatable)" ) + ( "itemCountEnd,j", po::value< int >( &dataItemCountEnd )->default_value( 0 ), "End of item count, start at i go to j in powers of 2." ) + ( "ldsPasses,l", po::value< int >( &ldsPasses )->default_value( 1 ), "Number of 'passes' using LDS (default: 1, 0 = no LDS used)" ) + ( "barrier,b", po::value< bool >( &useBarrier )->default_value( true ), "Include memory barrier in kernel" ) + ( "writeOnly,x", "Write only kernels (default: false)" ) + ( "readOnly,y", "Read only kernels (default: false" ) + ( "disableOptimization,n", "Disable OpenCL compiler optimizations (default: false" ) + ( "zeroMemcopy,z", "Use zero memcopy kernels, only valid on APUs (default 0)" ) + ( "workgroupSize,w", po::value< int >( &workgroupSize )->default_value( 64 ), "Workgroup size (default 64)" ) + ; + + po::variables_map vm; + po::store( po::parse_command_line( argc, argv, desc ), vm ); + po::notify( vm ); + + stringstream str; + + if( vm.count( "version" ) ) + { + str << "clMemcopy version: " << __DATE__ << " " << __TIME__ <<std::endl; + std::cout << str.str(); + str.str() = ""; + return 0; + } + + if( vm.count( "help" ) ) + { + // This needs to be 'cout' as program-options does not support wcout yet + std::cout << desc << std::endl; + return 0; + } + + size_t mutex = ((vm.count( "gpu" ) > 0) ? 1 : 0) + | ((vm.count( "cpu" ) > 0) ? 2 : 0); + if ((mutex & (mutex-1)) != 0) { + str << "You have selected mutually-exclusive OpenCL device options:" << std::endl; + if (vm.count ( "gpu" ) > 0) str << " gpu,g Force instantiation of an OpenCL GPU device" << std::endl; + if (vm.count ( "cpu" ) > 0) str << " cpu,c Force instantiation of an OpenCL CPU device" << std::endl; + { + std::cout << str.str(); + return 1; + } + } + + mutex = ((vm.count( "writeOnly" ) > 0) ? 1 : 0) + | ((vm.count( "readOnly" ) > 0) ? 2 : 0); + if ((mutex & (mutex-1)) != 0) { + str << "You have selected mutually-exclusive OpenCL device options:" << std::endl; + if (vm.count ( "writeOnly" ) > 0) str << " writeOnly,x Generate write only kernels" << std::endl; + if (vm.count ( "readOnly" ) > 0) str << " readOnly,y Generate read only kernels" << std::endl; + { + std::cout << str.str(); + return 1; + } + } + + if( vm.count( "gpu" ) ) + { + deviceType = CL_DEVICE_TYPE_GPU; + } + + if( vm.count( "cpu" ) ) + { + deviceType = CL_DEVICE_TYPE_CPU; + } + + if( vm.count( "writeOnly" ) ) + { + writeOnly = true; + } + + if( vm.count( "readOnly" ) ) + { + readOnly = true; + } + + if( vm.count( "zeroMemcopy" ) ) + { + bZeroMemcopy = true; + } + + int typeCount = 0; + + if( vm.count( "float" ) ) + { + if(!clType->setSize(vm["float"].as<int>())) + { + std::cout << "Float (float,-f) type must be 1,2,4,8, or 16."; + return 1; + } + typeCount ++; + } + + if( vm.count( "double" ) ) + { + clType = &lDouble; + if(!clType->setSize(vm["double"].as<int>())) + { + std::cout << "Double (double,-d) type must be 1, or 2."; + return 1; + } + bDoublePrecision = true; + typeCount ++; + } + + if(typeCount > 1) + { + std::cout << "Only one register type may be specified (Float,Double)."; + return 1; + } + + if( vm.count( "memcpyOnly" ) ) + { + memcpyOnly = true; + registerCount = 1; + } + + if( vm.count( "disableOptimization" ) ) + { + bDisableOptimization = true; + } + + if(workgroupSize < 1) + { + printf("Error: workgroup size can not be 0"); + return 1; + } + + // if the register count is < 1, it's a pure memcpy kernel + if(registerCount < 1) + { + registerCount = 1; + memcpyOnly = true; + } + + } + catch( std::exception& e ) + { + std::cout << "clMemcopy error condition reported:" << std::endl << e.what() << std::endl; + return 1; + } + + // enumerate platforms to see if anything is available. + // + err=clGetPlatformIDs(1, &platform, &platforms); + if (err != CL_SUCCESS) + { + printf("Error: Failed to get a platform.!\n"); + return EXIT_FAILURE; + } + + // Connect to a compute device + // + err = clGetDeviceIDs(platform, deviceType, 1, &device_id, NULL); + if (err != CL_SUCCESS) + { + printf("Error: Failed to create a device group!\n"); + return EXIT_FAILURE; + } + + // Create a compute context + // + context = clCreateContext(0, 1, &device_id, NULL, NULL, &err); + if (!context) + { + printf("Error: Failed to create a compute context!\n"); + return EXIT_FAILURE; + } + + // Create a command commands + // + commands = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &err); + if (!commands) + { + printf("Error: Failed to create a command commands!\n"); + return EXIT_FAILURE; + } + + // find how much global memory can safely be allocated + // + cl_ulong maxMemAlloc = 0; + err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE,sizeof(cl_ulong), &maxMemAlloc, NULL); + + if (err != CL_SUCCESS) + { + printf("Error: Failed to read MAX_MEM_ALLOC_SIZE from device!\n"); + return EXIT_FAILURE; + } + + // find how much local memory can safely be allocated + // + cl_ulong maxLocalMemAlloc = 0; + err = clGetDeviceInfo(device_id, CL_DEVICE_LOCAL_MEM_SIZE,sizeof(cl_ulong), &maxLocalMemAlloc, NULL); + + if (err != CL_SUCCESS) + { + printf("Error: Failed to read CL_DEVICE_LOCAL_MEM_SIZE from device!\n"); + return EXIT_FAILURE; + } + + // check if double precision is supported + // If the user specifies double precision, check that the device supports double precision first + if( bDoublePrecision ) + { + std::vector< cl_device_id > dev; + dev.push_back(device_id); + + bool retAmdFp64 = checkDevExt( "cl_amd_fp64", dev ); + if( retAmdFp64 != true ) + { + // If AMD's extention is not supported, check for Khronos extention + bool retKhrFp64 = checkDevExt( "cl_khr_fp64", dev ); + if( retKhrFp64 != true ) + { + printf("Error: Device %d does not support double precission\n", device_id); + return EXIT_FAILURE; + } + } + } + + + do + { + + // generate a kernel + // + stringstream kernelSource; + + GenerateMemcpyKernel(kernelSource, registerCount, dumbyRegisterCount, workgroupSize, clType, useBarrier, ldsPasses, dataItemCount, writeOnly, readOnly, memcpyOnly, bDoublePrecision); + + if( !dataItemCountEnd ) // + { + printf("\n%s\n", kernelSource.str().c_str()); + } + // calculate how many data items we want to move, float1,2,4 + // + if(dataItemCount == 0) + { + if( memcpyOnly ) + { + maxMemAlloc /= 2; // need two buffers + } + dataItemCount = (int)(maxMemAlloc / (clType->getSize())); + dataItemCount /= registerCount * workgroupSize; + dataItemCount *= registerCount * workgroupSize; + } + + + // Fill our data set with random float values + // + + void* data = aligned_malloc(clType->getSize() * dataItemCount, 256); // original data set given to device + if(data == NULL) + { + printf("Error: Failed allcating host data buffer!\n"); + return EXIT_FAILURE; + } + + srand ( (unsigned int) time(NULL) ); + for(int i = 0; i < dataItemCount * clType->getSize(); i++) + { + *((char *)data + i) = rand() / (char)RAND_MAX; + } + + // Create the compute program from the source buffer + // + std::string stringKern = kernelSource.str(); + const char *charKern = stringKern.c_str(); + program = clCreateProgramWithSource(context, 1, (const char **) &charKern, NULL, &err); + if (!program) + { + printf("Error: Failed to create compute program!\n"); + return EXIT_FAILURE; + } + + // Build the program executable + // + if(bDisableOptimization) + { + err = clBuildProgram(program, 0, NULL, "-g -cl-opt-disable", NULL, NULL); + } + else + { + err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); + } + + if (err != CL_SUCCESS) + { + size_t len; + char buffer[2048]; + + printf("Error: Failed to build program executable!\n"); + clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); + printf("%s\n", buffer); + exit(1); + } + + // Create the compute kernel in the program we wish to run + // + kernel = clCreateKernel(program, "memcpy", &err); + if (!kernel || err != CL_SUCCESS) + { + printf("Error: Failed to create compute kernel!\n"); + exit(1); + } + /* + // Discover and load the timer module if present + void* timerLibHandle = LoadSharedLibrary( "lib", "clAmdFft.StatTimer", false ); + if( timerLibHandle == NULL ) + { + terr << _T( "Could not find the external timing library; timings disabled" ) << std::endl; + } + + + // Timer module discovered and loaded successfully + // Initialize function pointers to call into the shared module + PFGETSTATTIMER get_timer = reinterpret_cast< PFGETSTATTIMER > ( LoadFunctionAddr( timerLibHandle, "getStatTimer" ) ); + + // Create and initialize our timer class, if the external timer shared library loaded + baseStatTimer* timer = NULL; + */ + size_t writeTimer,readTimer,executeTimer = 0; + StatisticalTimer &timer = StatisticalTimer::getInstance(); + + + + // timer->setNormalize( true ); + timer.Reserve( 3, 1 ); + + writeTimer = timer.getUniqueID( "write", 0 ); + readTimer = timer.getUniqueID( "read", 1 ); + executeTimer = timer.getUniqueID( "execute", 2); + + + // Create the input and output arrays in device memory for our calculation + // + + cl_mem_flags memFlags = CL_MEM_READ_ONLY; + void *hostPtr = NULL; + void *hostPtrOut = NULL; // use to map point to output buffer for memcopy only kernels + + // this option will only work on APUs same physical memory is used by host and device + if(bZeroMemcopy) + { + memFlags |= CL_MEM_ALLOC_HOST_PTR; + // memFlags |= CL_MEM_USE_PERSISTENT_MEM_AMD; + } + + input = clCreateBuffer(context, memFlags, clType->getSize() * dataItemCount, NULL, NULL); + + if (!input) + { + printf("Error: Failed to allocate device memory!!\n"); + exit(1); + } + + if(memcpyOnly) + { + output = clCreateBuffer(context, memFlags, clType->getSize() * dataItemCount, NULL, NULL); + if (!output) + { + printf("Error: Failed to allocate device memory!\n"); + exit(1); + } + } + + + + if( bZeroMemcopy ) + { + // err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, clType->getSize() * dataItemCount, data, 0, NULL, NULL); //test + // if( err != CL_SUCCESS ) + // { + // printf("Error: Failed to copy host buffer to cl buffer (zero memcopy)!\n"); + // return EXIT_FAILURE; + // } + + hostPtr = clEnqueueMapBuffer(commands, input, CL_TRUE, CL_MAP_WRITE, 0, clType->getSize() * dataItemCount, 0, NULL, NULL, &err); + if( err != CL_SUCCESS ) + { + printf("Error: Failed to map host pointer to zero memcopy buffer!\n"); + return EXIT_FAILURE; + } + + if(memcpyOnly) + { + hostPtrOut = clEnqueueMapBuffer(commands, output, CL_TRUE, CL_MAP_WRITE, 0, clType->getSize() * dataItemCount, 0, NULL, NULL, &err); + if( err != CL_SUCCESS ) + { + printf("Error: Failed to map host pointer to zero memcopy buffer!\n"); + return EXIT_FAILURE; + } + } + + // start timing writing to buffer (device or zero mem copy) + timer.Start(writeTimer); + + memcpy( hostPtr, data, clType->getSize() * dataItemCount); + } + else + { + // start timing writing to buffer (device or zero mem copy) + timer.Start(writeTimer); + } + + // Write our data set into the input array in device memory + // + if( !bZeroMemcopy ) + { + cl_event eventKernelTiming; // for timing + + err = clEnqueueWriteBuffer(commands, input, CL_TRUE, 0, clType->getSize() * dataItemCount, data, 0, NULL, &eventKernelTiming); + if (err != CL_SUCCESS) + { + printf("Error: Failed to write to source array!\n"); + exit(1); + } + clFinish(commands); + + + clGetEventProfilingInfo(eventKernelTiming, CL_PROFILING_COMMAND_START, + sizeof(start), &start, NULL); + + clGetEventProfilingInfo(eventKernelTiming, CL_PROFILING_COMMAND_END, + sizeof(end), &end, NULL); + + cl_ulong diff = end-start; + + if( !dataItemCountEnd) + { + printf("\nbuffer write GPU timer %lld",diff); + } + } + + timer.Stop(writeTimer); + + // Set the arguments to our compute kernel + // + + err = 0; + err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input); + + if(memcpyOnly) + { + err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); + } + + // err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &output); + // err |= clSetKernelArg(kernel, 2, sizeof(unsigned int), &count); + if (err != CL_SUCCESS) + { + printf("Error: Failed to set kernel arguments! %d\n", err); + exit(1); + } + + + // Get the maximum work group size for executing the kernel on the device + // + err = clGetKernelWorkGroupInfo(kernel, device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(local), &local, NULL); + if (err != CL_SUCCESS) + { + printf("Error: Failed to retrieve kernel work group info! %d\n", err); + exit(1); + } + + // Execute the kernel over the entire range of our 1d input data set + // using the maximum number of work group items for this device + // + if(!memcpyOnly) + { + global = dataItemCount / registerCount; + } + else + { + global = dataItemCount; + } + + if(workgroupSize < local && workgroupSize != 0) + { + local = workgroupSize; + } + if(workgroupSize > local) + { + printf("Error: Max supported workgroup size is %d, requested was %d", (unsigned int)local, workgroupSize); + exit(1); + } + + cl_event eventKernelTiming; // for timing + + timer.Start(executeTimer); // measure kernel execution time + + err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, &eventKernelTiming); + + // Wait for the command commands to get serviced before reading back results + // + + //clWaitForEvents(1, &eventGlobal); + clFinish(commands); + + timer.Stop(executeTimer); // end of kernel execution + + clGetEventProfilingInfo(eventKernelTiming, CL_PROFILING_COMMAND_START, + sizeof(start), &start, NULL); + + clGetEventProfilingInfo(eventKernelTiming, CL_PROFILING_COMMAND_END, + sizeof(end), &end, NULL); + + clReleaseEvent(eventKernelTiming); + + if (err) + { + printf("Error: Failed to execute kernel!\n"); + return EXIT_FAILURE; + } + + timer.Start(readTimer); // measure time to read back from memory + + // Read back the results from the device to verify the output + // + + if( !bZeroMemcopy ) + { + err = clEnqueueReadBuffer( commands, input, CL_TRUE, 0, clType->getSize() * dataItemCount, data, 0, NULL, NULL); + if (err != CL_SUCCESS) + { + printf("Error: Failed to read output array! %d\n", err); + exit(1); + } + } + + timer.Stop(readTimer); + + cl_ulong time = end - start; /* Convert nanoseconds to msecs */ + + // Calculate gflops + + cl_ulong dataTransferred = dataItemCount * clType->getSize(); + + int multiplier = 2; + if(readOnly || writeOnly) + { + multiplier = 1; + } + + int fftlen = (int)(local * registerCount * clType->getSize()/clType->getTypeSize()) / 2; + double gflops = (global/local) * 5 * fftlen * ( log( static_cast< double >( fftlen ) ) / log( 2.0 ) ) / time; + double MBps = (double)(multiplier * (double)(dataTransferred) / time); + + if( !dataItemCountEnd ) + { + printf("\nTicks= %ld\nTransfer= %ld bytes\nbandwidth= %lf GB/S", time , dataTransferred, MBps); + + if(!memcpyOnly) + { + printf("\nType = %s\nfftlen=%d\nGflops %lf\n",clType->getName().c_str(), fftlen, gflops); + } + } + // Shutdown and cleanup + // + + if(bZeroMemcopy) + { + err = clEnqueueUnmapMemObject(commands, input, hostPtr, 0, 0, 0); + if(memcpyOnly) + { + err = clEnqueueUnmapMemObject(commands, output, hostPtrOut, 0, 0, 0); + } + + if(err != CL_SUCCESS) + { + printf("Error: Failed to unmap memory objects!\n"); + return EXIT_FAILURE; + } + } + + clReleaseMemObject(input); + if( memcpyOnly ) + { + clReleaseMemObject(output); + } + + if( data ) + { + aligned_free( data ); + } + + printf("\n%10ld,\t%f,\t%f,\t%f,\t%f,",dataTransferred, timer.getMinimumTime(writeTimer) ,timer.getMinimumTime(executeTimer),timer.getMinimumTime(readTimer),timer.getMinimumTime(writeTimer) + timer.getMinimumTime(executeTimer) + timer.getMinimumTime(readTimer) ); + + clReleaseProgram(program); + clReleaseKernel(kernel); + + dataItemCount*= 2; +} while(dataItemCount <= dataItemCountEnd); + + + clReleaseCommandQueue(commands); + clReleaseContext(context); + + return 0; +} + diff --git a/RTCP/GPUProc/clAmdFft/samples/statisticalTimer.cpp b/RTCP/GPUProc/clAmdFft/samples/statisticalTimer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f9111f20069f499c230d9287924a55985da8258c --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/statisticalTimer.cpp @@ -0,0 +1,328 @@ +//////////////////////////////////////////// +// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. +//////////////////////////////////////////// + +// StatTimer.cpp : Defines the exported functions for the DLL application. +// + +#include "stdafx.h" +#include <iostream> +#include <string> +#include <cassert> +#include <limits> +#include <functional> +#include "statisticalTimer.h" + +#if defined( __GNUC__ ) + #include <sys/time.h> +#endif + +// Functor object to help with accumulating values in vectors +template< typename T > +struct Accumulator: public std::unary_function< T, void > +{ + T acc; + + Accumulator( ): acc( 0 ) {} + void operator( )(T x) { acc += x; } +}; + +// Unary predicate used for remove_if() algorithm +// Currently, RangeType is expected to be a floating point type, and ValType an integer type +template< typename RangeType, typename ValType > +struct PruneRange +{ + RangeType lower, upper; + + PruneRange( RangeType mean, RangeType stdev ): lower( mean-stdev ), upper( mean+stdev ) {} + + bool operator( )( ValType val ) + { + // These comparisons can be susceptible to signed/unsigned casting problems + // This is why we cast ValType to RangeType, because RangeType should always be floating and signed + if( static_cast< RangeType >( val ) < lower ) + return true; + else if( static_cast< RangeType >( val ) > upper ) + return true; + + return false; + } +}; + +StatisticalTimer& +StatisticalTimer::getInstance( ) +{ + static StatisticalTimer timer; + return timer; +} + +StatisticalTimer::StatisticalTimer( ): nEvents( 0 ), nSamples( 0 ), normalize( true ) +{ +#if defined( _WIN32 ) + // OS call to get ticks per second2 + ::QueryPerformanceFrequency( reinterpret_cast<LARGE_INTEGER*>( &clkFrequency ) ); +#else + clkFrequency = 1000000; +#endif +} + +StatisticalTimer::~StatisticalTimer( ) +{} + +void +StatisticalTimer::Clear( ) +{ + labelID.clear( ); + clkStart.clear( ); + clkTicks.clear( ); +} + +void +StatisticalTimer::Reset( ) +{ + if( nEvents == 0 || nSamples == 0 ) + throw std::runtime_error( "StatisticalTimer::Reserve( ) was not called before Reset( )" ); + + clkStart.clear( ); + clkTicks.clear( ); + + clkStart.resize( nEvents ); + clkTicks.resize( nEvents ); + + for( unsigned int i = 0; i < nEvents; ++i ) + { + clkTicks.at( i ).reserve( nSamples ); + } + + return; +} + +// The caller can pre-allocate memory, to improve performance. +// nEvents is an approximate value for how many seperate events the caller will think +// they will need, and nSamples is a hint on how many samples we think we will take +// per event +void +StatisticalTimer::Reserve( unsigned int nEvents, unsigned int nSamples ) +{ + this->nEvents = std::max<unsigned int> (1, nEvents); + this->nSamples = std::max<unsigned int> (1, nSamples); + + Clear( ); + labelID.reserve( nEvents ); + + clkStart.resize( nEvents ); + clkTicks.resize( nEvents ); + + for( unsigned int i = 0; i < nEvents; ++i ) + { + clkTicks.at( i ).reserve( nSamples ); + } +} + +void +StatisticalTimer::setNormalize( bool norm ) +{ + normalize = norm; +} + +void +StatisticalTimer::Start( sTimerID id ) +{ +#if defined( _WIN32 ) + ::QueryPerformanceCounter( reinterpret_cast<LARGE_INTEGER*>( &clkStart.at( id ) ) ); +#else + struct timeval s; + gettimeofday(&s, 0); + clkStart.at( id ) = (unsigned long long)s.tv_sec * 1000000 + (unsigned long long)s.tv_usec; +#endif +} + +void +StatisticalTimer::Stop( sTimerID id ) +{ + unsigned long long n; + +#if defined( _WIN32 ) + ::QueryPerformanceCounter( reinterpret_cast<LARGE_INTEGER*>( &n ) ); +#else + struct timeval s; + gettimeofday(&s, 0); + n = (unsigned long long)s.tv_sec * 1000000 + (unsigned long long)s.tv_usec; +#endif + + n -= clkStart.at( id ); + clkStart.at( id ) = 0; + AddSample( id, n ); +} + +void +StatisticalTimer::AddSample( const sTimerID id, const unsigned long long n ) +{ + clkTicks.at( id ).push_back( n ); +} + +// This function's purpose is to provide a mapping from a 'friendly' human readable text string +// to an index into internal data structures. +StatisticalTimer::sTimerID +StatisticalTimer::getUniqueID( const std::string& label, unsigned int groupID ) +{ + // I expect labelID will hardly ever grow beyond 30, so it's not of any use + // to keep this sorted and do a binary search + + labelPair sItem = std::make_pair( label, groupID ); + + stringVector::iterator iter; + iter = std::find( labelID.begin(), labelID.end(), sItem ); + + if( iter != labelID.end( ) ) + return std::distance( labelID.begin( ), iter ); + + labelID.push_back( sItem ); + + return labelID.size( ) - 1; + +} + +double +StatisticalTimer::getMean( sTimerID id ) const +{ + if( clkTicks.empty( ) ) + return 0; + + size_t N = clkTicks.at( id ).size( ); + + Accumulator<unsigned long long> sum = std::for_each( clkTicks.at( id ).begin(), clkTicks.at( id ).end(), Accumulator<unsigned long long>() ); + + return static_cast<double>( sum.acc ) / N; +} + +double +StatisticalTimer::getVariance( sTimerID id ) const +{ + if( clkTicks.empty( ) ) + return 0; + + double mean = getMean( id ); + + size_t N = clkTicks.at( id ).size( ); + double sum = 0; + + for( unsigned int i = 0; i < N; ++i ) + { + double diff = clkTicks.at( id ).at( i ) - mean; + diff *= diff; + sum += diff; + } + + return sum / N; +} + +double +StatisticalTimer::getStdDev( sTimerID id ) const +{ + double variance = getVariance( id ); + + return sqrt( variance ); +} + +double +StatisticalTimer::getAverageTime( sTimerID id ) const +{ + if( normalize ) + return getMean( id ) / clkFrequency; + else + return getMean( id ); +} + +double +StatisticalTimer::getMinimumTime( sTimerID id ) const +{ + clkVector::const_iterator iter = std::min_element( clkTicks.at( id ).begin( ), clkTicks.at( id ).end( ) ); + + if( iter != clkTicks.at( id ).end( ) ) + { + if( normalize ) + return static_cast<double>( *iter ) / clkFrequency; + else + return static_cast<double>( *iter ); + } + else + return 0; +} + +unsigned int +StatisticalTimer::pruneOutliers( sTimerID id , double multiple ) +{ + if( clkTicks.empty( ) ) + return 0; + + double mean = getMean( id ); + double stdDev = getStdDev( id ); + + clkVector& clks = clkTicks.at( id ); + + // Look on p. 379, "The C++ Standard Library" + // std::remove_if does not actually erase, it only copies elements, it returns new 'logical' end + clkVector::iterator newEnd = std::remove_if( clks.begin( ), clks.end( ), PruneRange< double,unsigned long long >( mean, multiple*stdDev ) ); + + clkVector::difference_type dist = std::distance( newEnd, clks.end( ) ); + + if( dist != 0 ) + clks.erase( newEnd, clks.end( ) ); + + assert( dist < std::numeric_limits< unsigned int >::max( ) ); + + return static_cast< unsigned int >( dist ); +} + +unsigned int +StatisticalTimer::pruneOutliers( double multiple ) +{ + unsigned int tCount = 0; + + for( unsigned int l = 0; l < labelID.size( ); ++l ) + { + unsigned int lCount = pruneOutliers( l , multiple ); + std::clog << "\tStatisticalTimer:: Pruning " << lCount << " samples from " << labelID[l].first << std::endl; + tCount += lCount; + } + + return tCount; +} + +// Defining an output print operator +std::ostream& +operator<<( std::ostream& os, const StatisticalTimer& st ) +{ + if( st.clkTicks.empty( ) ) + return os; + + std::ios::fmtflags bckup = os.flags( ); + + for( unsigned int l = 0; l < st.labelID.size( ); ++l ) + { + unsigned long long min = 0; + StatisticalTimer::clkVector::const_iterator iter = std::min_element( st.clkTicks.at( l ).begin( ), st.clkTicks.at( l ).end( ) ); + + if( iter != st.clkTicks.at( l ).end( ) ) + min = *iter; + + os << st.labelID[l].first << ", " << st.labelID[l].second << std::fixed << std::endl; + os << "Min:," << min << std::endl; + os << "Mean:," << st.getMean( l ) << std::endl; + os << "StdDev:," << st.getStdDev( l ) << std::endl; + os << "AvgTime:," << st.getAverageTime( l ) << std::endl; + os << "MinTime:," << st.getMinimumTime( l ) << std::endl; + + for( unsigned int t = 0; t < st.clkTicks[l].size( ); ++t ) + { + os << st.clkTicks[l][t]<< ","; + } + os << "\n" << std::endl; + + } + + os.flags( bckup ); + + return os; +} diff --git a/RTCP/GPUProc/clAmdFft/samples/statisticalTimer.h b/RTCP/GPUProc/clAmdFft/samples/statisticalTimer.h new file mode 100644 index 0000000000000000000000000000000000000000..f7e38c0fac3d240aed5025bbefc8da76fee3db26 --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/statisticalTimer.h @@ -0,0 +1,157 @@ +//////////////////////////////////////////// +// Copyright (C) 2010,2011 Advanced Micro Devices, Inc. All Rights Reserved. +//////////////////////////////////////////// + +#pragma once +#ifndef _STATISTICALTIMER_H_ +#define _STATISTICALTIMER_H_ +#include <iosfwd> +#include <vector> +#include <algorithm> + +/** + * \file clAmdFft.StatisticalTimer.h + * \brief A timer class that provides a cross platform timer for use + * in timing code progress with a high degree of accuracy. + * This class is implemented entirely in the header, to facilitate inclusion into multiple + * projects without needing to compile an object file for each project. + */ + +/** + * \class StatisticalTimer + * \brief Counter that provides a fairly accurate timing mechanism for both + * windows and linux. This timer is used extensively in all the samples. + */ + +class StatisticalTimer +{ + // Private typedefs + typedef std::vector< unsigned long long > clkVector; + typedef std::pair< std::string, unsigned int > labelPair; + typedef std::vector< labelPair > stringVector; + + // In order to calculate statistics <std. dev.>, we need to keep a history of our timings + stringVector labelID; + clkVector clkStart; + std::vector< clkVector > clkTicks; + + // How many clockticks in a second + unsigned long long clkFrequency; + + // Saved sizes for our vectors, used in Reset() to reallocate vectors + clkVector::size_type nEvents, nSamples; + + // This setting controls whether the Timer should convert samples into time by dividing by the + // clock frequency + bool normalize; + + /** + * \fn StatisticalTimer() + * \brief Constructor for StatisticalTimer that initializes the class + * This is private so that user code cannot create their own instantiation. Instead, you + * must go through getInstance( ) to get a reference to the class. + */ + StatisticalTimer( ); + + /** + * \fn ~StatisticalTimer() + * \brief Destructor for StatisticalTimer that cleans up the class + */ + ~StatisticalTimer( ); + + /** + * \fn StatisticalTimer(const StatisticalTimer& ) + * \brief Copy constructors do not make sense for a singleton, disallow copies + */ + StatisticalTimer( const StatisticalTimer& ); + + /** + * \fn operator=( const StatisticalTimer& ) + * \brief Assignment operator does not make sense for a singleton, disallow assignments + */ + StatisticalTimer& operator=( const StatisticalTimer& ); + + friend std::ostream& operator<<( std::ostream& os, const StatisticalTimer& s ); + +public: + // Public typedefs + typedef stringVector::difference_type sTimerID; + + /** + * \fn getInstance() + * \brief This returns a reference to the singleton timer. Guarantees only 1 timer class is ever + * instantiated within a compilable executable. + */ + static StatisticalTimer& getInstance( ); + + /** + * \fn void Start( sTimerID id ) + * \brief Start the timer + * \sa Stop(), Reset() + */ + void Start( sTimerID id ); + + /** + * \fn void Stop( sTimerID id ) + * \brief Stop the timer + * \sa Start(), Reset() + */ + void Stop( sTimerID id ); + + /** + * \fn void AddSample( const sTimerID id, const unsigned long long n ) + * \brief Explicitely add a timing sample into the class + */ + void AddSample( const sTimerID id, const unsigned long long n ); + + /** + * \fn void Reset(void) + * \brief Reset the timer to 0 + * \sa Start(), Stop() + */ + void Clear( ); + + /** + * \fn void Reset(void) + * \brief Reset the timer to 0 + * \sa Start(), Stop() + */ + void Reset( ); + + void Reserve( unsigned int nEvents, unsigned int nSamples ); + + sTimerID getUniqueID( const std::string& label, unsigned int groupID ); + + // Calculate the average/mean of data for a given event + void setNormalize( bool norm ); + + // Calculate the average/mean of data for a given event + double getMean( sTimerID id ) const; + + // Calculate the variance of data for a given event + // Variance - average of the squared differences between data points and the mean + double getVariance( sTimerID id ) const; + + // Sqrt of variance, also in units of the original data + double getStdDev( sTimerID id ) const; + + /** + * \fn double getAverageTime(sTimerID id) const + * \return Return the arithmetic mean of all the samples that have been saved + */ + double getAverageTime( sTimerID id ) const; + + /** + * \fn double getMinimumTime(sTimerID id) const + * \return Return the arithmetic min of all the samples that have been saved + */ + double getMinimumTime( sTimerID id ) const; + + // Using the stdDev of the entire population (of an id), eliminate those samples that fall + // outside some specified multiple of the stdDev. This assumes that the population + // form a gaussian curve. + unsigned int pruneOutliers( double multiple ); + unsigned int pruneOutliers( sTimerID id , double multiple ); +}; + +#endif // _STATISTICALTIMER_H_ diff --git a/RTCP/GPUProc/clAmdFft/samples/stdafx.cpp b/RTCP/GPUProc/clAmdFft/samples/stdafx.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a4069dfcc0f47908ae7fb5fbfbeac3507e383a49 --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/stdafx.cpp @@ -0,0 +1,12 @@ +//////////////////////////////////////////// +// Copyright (C) 2010 Advanced Micro Devices, Inc. All Rights Reserved. +//////////////////////////////////////////// + +// stdafx.cpp : source file that includes just the standard includes +// clAmdFft.pch will be the pre-compiled header +// stdafx.obj will contain the pre-compiled type information + +#include "stdafx.h" + +// TODO: reference any additional headers you need in STDAFX.H +// and not in this file diff --git a/RTCP/GPUProc/clAmdFft/samples/stdafx.h b/RTCP/GPUProc/clAmdFft/samples/stdafx.h new file mode 100644 index 0000000000000000000000000000000000000000..4887dae7f60052252150a02518cf0c65c8318e7a --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/stdafx.h @@ -0,0 +1,27 @@ +//////////////////////////////////////////// +// Copyright (C) 2010 Advanced Micro Devices, Inc. All Rights Reserved. +//////////////////////////////////////////// + +// stdafx.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently +// + +#pragma once + +#include "targetver.h" + +#include <iostream> +#include <stdexcept> +#include <iomanip> +#include <complex> +#include <valarray> +#include <stdarg.h> +#if defined( _WIN32 ) + #define NOMINMAX + #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers + + #include <tchar.h> + #include <windows.h> +#endif + diff --git a/RTCP/GPUProc/clAmdFft/samples/targetver.h b/RTCP/GPUProc/clAmdFft/samples/targetver.h new file mode 100644 index 0000000000000000000000000000000000000000..bf68fd6c48ba9919933b764c4db9119492f5f45c --- /dev/null +++ b/RTCP/GPUProc/clAmdFft/samples/targetver.h @@ -0,0 +1,14 @@ +//////////////////////////////////////////// +// Copyright (C) 2010 Advanced Micro Devices, Inc. All Rights Reserved. +//////////////////////////////////////////// + +#pragma once + +// Including SDKDDKVer.h defines the highest available Windows platform. + +// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and +// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. + +#if defined( _WIN32 ) + #include <SDKDDKVer.h> +#endif diff --git a/RTCP/GPUProc/src/Align.h b/RTCP/GPUProc/src/Align.h new file mode 100644 index 0000000000000000000000000000000000000000..531bc0c8f80b9fab27f19460058c699d0da8a28c --- /dev/null +++ b/RTCP/GPUProc/src/Align.h @@ -0,0 +1,20 @@ +#if !defined ALIGN_H +#define ALIGN_H + +template <typename T> inline static bool powerOfTwo(T n) +{ + return (n | (n - 1)) == 2 * n - 1; +} + + +template <typename T> inline static T align(T value, size_t alignment) +{ +#if defined __GNUC__ + if (__builtin_constant_p(alignment) && powerOfTwo(alignment)) + return (value + alignment - 1) & ~(alignment - 1); + else +#endif + return (value + alignment - 1) / alignment * alignment; +} + +#endif diff --git a/RTCP/GPUProc/src/BandPass.cc b/RTCP/GPUProc/src/BandPass.cc new file mode 100644 index 0000000000000000000000000000000000000000..7bc74df48c65f75368fd401b8799ab6886d85721 --- /dev/null +++ b/RTCP/GPUProc/src/BandPass.cc @@ -0,0 +1,2141 @@ +#include "lofar_config.h" + +#include <BandPass.h> + +#if defined HAVE_FFTW3 +#include <fftw3.h> +#elif defined HAVE_FFTW2 +#include <fftw.h> +#else +#error Should have FFTW3 or FFTW2 installed +#endif + +#define STATION_FILTER_LENGTH 16384 // Number of filter taps of the station filters. +#define STATION_FFT_SIZE 1024 // The size of the FFT that the station filter does + +#include <complex> +#include <vector> + +namespace BandPass { + + +static const float stationFilterConstants[] = +{ + 36, 36, 35, 35, 34, 33, 32, 31, + 29, 28, 26, 25, 23, 21, 20, 18, + 17, 15, 14, 12, 11, 10, 9, 9, + 8, 8, 7, 7, 7, 7, 7, 7, + 7, 7, 8, 8, 8, 9, 9, 9, + 10, 10, 10, 10, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 12, 12, 12, 12, 12, 12, + 13, 13, 13, 13, 13, 13, 13, 13, + 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 15, 15, + 15, 15, 15, 15, 15, 15, 16, 16, + 16, 16, 16, 16, 16, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 17, 18, + 18, 18, 18, 18, 18, 18, 18, 18, + 19, 19, 19, 19, 19, 19, 19, 20, + 20, 20, 20, 20, 20, 20, 21, 21, + 21, 21, 21, 21, 21, 21, 22, 22, + 22, 22, 22, 22, 22, 22, 23, 23, + 23, 23, 23, 23, 23, 24, 24, 24, + 24, 24, 24, 25, 25, 25, 25, 25, + 25, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 27, 27, 28, + 28, 28, 28, 28, 28, 29, 29, 29, + 29, 29, 29, 30, 30, 30, 30, 30, + 31, 31, 31, 31, 31, 31, 32, 32, + 32, 32, 32, 32, 33, 33, 33, 33, + 33, 33, 34, 34, 34, 34, 34, 35, + 35, 35, 35, 35, 36, 36, 36, 36, + 36, 37, 37, 37, 37, 37, 37, 38, + 38, 38, 38, 38, 39, 39, 39, 39, + 39, 40, 40, 40, 40, 40, 41, 41, + 41, 41, 42, 42, 42, 42, 42, 43, + 43, 43, 43, 43, 44, 44, 44, 44, + 44, 45, 45, 45, 45, 46, 46, 46, + 46, 46, 47, 47, 47, 47, 48, 48, + 48, 48, 49, 49, 49, 49, 50, 50, + 50, 50, 50, 51, 51, 51, 51, 52, + 52, 52, 52, 53, 53, 53, 53, 54, + 54, 54, 54, 55, 55, 55, 55, 56, + 56, 56, 56, 57, 57, 57, 57, 58, + 58, 58, 58, 59, 59, 59, 59, 60, + 60, 60, 60, 61, 61, 61, 62, 62, + 62, 62, 63, 63, 63, 63, 64, 64, + 64, 65, 65, 65, 65, 66, 66, 66, + 67, 67, 67, 67, 68, 68, 68, 69, + 69, 69, 69, 70, 70, 70, 71, 71, + 71, 71, 72, 72, 72, 73, 73, 73, + 74, 74, 74, 74, 75, 75, 75, 76, + 76, 76, 77, 77, 77, 77, 78, 78, + 78, 79, 79, 79, 80, 80, 80, 81, + 81, 81, 82, 82, 82, 83, 83, 83, + 84, 84, 84, 85, 85, 85, 85, 86, + 86, 86, 87, 87, 87, 88, 88, 88, + 89, 89, 89, 90, 90, 90, 91, 91, + 92, 92, 92, 93, 93, 93, 94, 94, + 94, 95, 95, 95, 96, 96, 96, 97, + 97, 97, 98, 98, 99, 99, 99, 100, + 100, 100, 101, 101, 101, 102, 102, 103, + 103, 103, 104, 104, 104, 105, 105, 105, + 106, 106, 107, 107, 107, 108, 108, 108, + 109, 109, 110, 110, 110, 111, 111, 112, + 112, 112, 113, 113, 113, 114, 114, 115, + 115, 115, 116, 116, 117, 117, 117, 118, + 118, 119, 119, 119, 120, 120, 121, 121, + 121, 122, 122, 123, 123, 123, 124, 124, + 125, 125, 125, 126, 126, 127, 127, 127, + 128, 128, 129, 129, 130, 130, 130, 131, + 131, 132, 132, 132, 133, 133, 134, 134, + 135, 135, 135, 136, 136, 137, 137, 138, + 138, 138, 139, 139, 140, 140, 141, 141, + 141, 142, 142, 143, 143, 144, 144, 145, + 145, 145, 146, 146, 147, 147, 148, 148, + 149, 149, 149, 150, 150, 151, 151, 152, + 152, 153, 153, 153, 154, 154, 155, 155, + 156, 156, 157, 157, 158, 158, 159, 159, + 159, 160, 160, 161, 161, 162, 162, 163, + 163, 164, 164, 165, 165, 165, 166, 166, + 167, 167, 168, 168, 169, 169, 170, 170, + 171, 171, 172, 172, 173, 173, 174, 174, + 174, 175, 175, 176, 176, 177, 177, 178, + 178, 179, 179, 180, 180, 181, 181, 182, + 182, 183, 183, 184, 184, 185, 185, 186, + 186, 187, 187, 188, 188, 189, 189, 190, + 190, 191, 191, 192, 192, 193, 193, 194, + 194, 195, 195, 196, 196, 197, 197, 198, + 198, 199, 199, 200, 200, 201, 201, 202, + 202, 203, 203, 204, 204, 205, 205, 206, + 206, 207, 207, 208, 208, 209, 209, 210, + 210, 211, 211, 212, 212, 213, 213, 214, + 214, 215, 215, 216, 217, 217, 218, 218, + 219, 219, 220, 220, 221, 221, 222, 222, + 223, 223, 224, 224, 225, 225, 226, 226, + 227, 227, 228, 229, 229, 230, 230, 231, + 231, 232, 232, 233, 233, 234, 234, 235, + 235, 236, 236, 237, 237, 238, 239, 239, + 240, 240, 241, 241, 242, 242, 243, 243, + 244, 244, 245, 245, 246, 247, 247, 248, + 248, 249, 249, 250, 250, 251, 251, 252, + 252, 253, 253, 254, 255, 255, 256, 256, + 257, 257, 258, 258, 259, 259, 260, 260, + 261, 261, 262, 263, 263, 264, 264, 265, + 265, 266, 266, 267, 267, 268, 268, 269, + 270, 270, 271, 271, 272, 272, 273, 273, + 274, 274, 275, 275, 276, 277, 277, 278, + 278, 279, 279, 280, 280, 281, 281, 282, + 282, 283, 283, 284, 285, 285, 286, 286, + 287, 287, 288, 288, 289, 289, 290, 290, + 291, 291, 292, 293, 293, 294, 294, 295, + 295, 296, 296, 297, 297, 298, 298, 299, + 299, 300, 300, 301, 302, 302, 303, 303, + 304, 304, 305, 305, 306, 306, 307, 307, + 308, 308, 309, 309, 310, 311, 311, 312, + 312, 313, 313, 314, 314, 315, 315, 316, + 316, 317, 317, 318, 318, 319, 319, 320, + 320, 321, 321, 322, 322, 323, 323, 324, + 325, 325, 326, 326, 327, 327, 328, 328, + 329, 329, 330, 330, 331, 331, 332, 332, + 333, 333, 334, 334, 335, 335, 336, 336, + 337, 337, 338, 338, 339, 339, 340, 340, + 341, 341, 342, 342, 343, 343, 344, 344, + 345, 345, 346, 346, 347, 347, 348, 348, + 348, 349, 349, 350, 350, 351, 351, 352, + 352, 353, 353, 354, 354, 355, 355, 356, + 356, 357, 357, 358, 358, 359, 359, 359, + 360, 360, 361, 361, 362, 362, 363, 363, + 364, 364, 365, 365, 365, 366, 366, 367, + 367, 368, 368, 369, 369, 370, 370, 370, + 371, 371, 372, 372, 373, 373, 373, 374, + 374, 375, 375, 376, 376, 377, 377, 377, + 378, 378, 379, 379, 380, 380, 380, 381, + 381, 382, 382, 382, 383, 383, 384, 384, + 385, 385, 385, 386, 386, 387, 387, 387, + 388, 388, 389, 389, 389, 390, 390, 391, + 391, 391, 392, 392, 393, 393, 393, 394, + 394, 394, 395, 395, 396, 396, 396, 397, + 397, 397, 398, 398, 399, 399, 399, 400, + 400, 400, 401, 401, 401, 402, 402, 402, + 403, 403, 404, 404, 404, 405, 405, 405, + 406, 406, 406, 407, 407, 407, 408, 408, + 408, 409, 409, 409, 409, 410, 410, 410, + 411, 411, 411, 412, 412, 412, 413, 413, + 413, 414, 414, 414, 414, 415, 415, 415, + 416, 416, 416, 416, 417, 417, 417, 418, + 418, 418, 418, 419, 419, 419, 419, 420, + 420, 420, 420, 421, 421, 421, 421, 422, + 422, 422, 422, 423, 423, 423, 423, 424, + 424, 424, 424, 425, 425, 425, 425, 425, + 426, 426, 426, 426, 427, 427, 427, 427, + 427, 428, 428, 428, 428, 428, 429, 429, + 429, 429, 429, 429, 430, 430, 430, 430, + 430, 430, 431, 431, 431, 431, 431, 431, + 432, 432, 432, 432, 432, 432, 432, 433, + 433, 433, 433, 433, 433, 433, 434, 434, + 434, 434, 434, 434, 434, 434, 434, 435, + 435, 435, 435, 435, 435, 435, 435, 435, + 435, 436, 436, 436, 436, 436, 436, 436, + 436, 436, 436, 436, 436, 436, 436, 436, + 436, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 436, 436, 436, 436, 436, 436, 436, 436, + 436, 436, 436, 436, 436, 436, 436, 435, + 435, 435, 435, 435, 435, 435, 435, 435, + 435, 434, 434, 434, 434, 434, 434, 434, + 434, 433, 433, 433, 433, 433, 433, 433, + 432, 432, 432, 432, 432, 432, 432, 431, + 431, 431, 431, 431, 430, 430, 430, 430, + 430, 429, 429, 429, 429, 429, 428, 428, + 428, 428, 428, 427, 427, 427, 427, 426, + 426, 426, 426, 425, 425, 425, 425, 424, + 424, 424, 424, 423, 423, 423, 423, 422, + 422, 422, 421, 421, 421, 420, 420, 420, + 420, 419, 419, 419, 418, 418, 418, 417, + 417, 417, 416, 416, 416, 415, 415, 415, + 414, 414, 413, 413, 413, 412, 412, 412, + 411, 411, 410, 410, 410, 409, 409, 409, + 408, 408, 407, 407, 406, 406, 406, 405, + 405, 404, 404, 403, 403, 403, 402, 402, + 401, 401, 400, 400, 399, 399, 398, 398, + 398, 397, 397, 396, 396, 395, 395, 394, + 394, 393, 393, 392, 392, 391, 391, 390, + 389, 389, 388, 388, 387, 387, 386, 386, + 385, 385, 384, 383, 383, 382, 382, 381, + 381, 380, 379, 379, 378, 378, 377, 376, + 376, 375, 375, 374, 373, 373, 372, 372, + 371, 370, 370, 369, 368, 368, 367, 367, + 366, 365, 365, 364, 363, 363, 362, 361, + 361, 360, 359, 358, 358, 357, 356, 356, + 355, 354, 354, 353, 352, 351, 351, 350, + 349, 349, 348, 347, 346, 346, 345, 344, + 343, 343, 342, 341, 340, 339, 339, 338, + 337, 336, 336, 335, 334, 333, 332, 332, + 331, 330, 329, 328, 328, 327, 326, 325, + 324, 323, 323, 322, 321, 320, 319, 318, + 318, 317, 316, 315, 314, 313, 312, 311, + 311, 310, 309, 308, 307, 306, 305, 304, + 303, 302, 302, 301, 300, 299, 298, 297, + 296, 295, 294, 293, 292, 291, 290, 289, + 288, 288, 287, 286, 285, 284, 283, 282, + 281, 280, 279, 278, 277, 276, 275, 274, + 273, 272, 271, 270, 269, 268, 267, 266, + 265, 264, 262, 261, 260, 259, 258, 257, + 256, 255, 254, 253, 252, 251, 250, 249, + 248, 247, 245, 244, 243, 242, 241, 240, + 239, 238, 237, 236, 234, 233, 232, 231, + 230, 229, 228, 227, 225, 224, 223, 222, + 221, 220, 218, 217, 216, 215, 214, 213, + 211, 210, 209, 208, 207, 206, 204, 203, + 202, 201, 200, 198, 197, 196, 195, 193, + 192, 191, 190, 189, 187, 186, 185, 184, + 182, 181, 180, 179, 177, 176, 175, 174, + 172, 171, 170, 168, 167, 166, 165, 163, + 162, 161, 159, 158, 157, 155, 154, 153, + 152, 150, 149, 148, 146, 145, 144, 142, + 141, 140, 138, 137, 136, 134, 133, 132, + 130, 129, 127, 126, 125, 123, 122, 121, + 119, 118, 116, 115, 114, 112, 111, 109, + 108, 107, 105, 104, 102, 101, 100, 98, + 97, 95, 94, 93, 91, 90, 88, 87, + 85, 84, 82, 81, 80, 78, 77, 75, + 74, 72, 71, 69, 68, 66, 65, 64, + 62, 61, 59, 58, 56, 55, 53, 52, + 50, 49, 47, 46, 44, 43, 41, 40, + 38, 37, 35, 34, 32, 30, 29, 27, + 26, 24, 23, 21, 20, 18, 17, 15, + 14, 12, 10, 9, 7, 6, 4, 3, + 1, 0, -2, -4, -5, -7, -8, -10, + -11, -13, -15, -16, -18, -19, -21, -23, + -24, -26, -27, -29, -31, -32, -34, -35, + -37, -39, -40, -42, -43, -45, -47, -48, + -50, -52, -53, -55, -57, -58, -60, -61, + -63, -65, -66, -68, -70, -71, -73, -75, + -76, -78, -80, -81, -83, -84, -86, -88, + -89, -91, -93, -94, -96, -98, -99, -101, + -103, -105, -106, -108, -110, -111, -113, -115, + -116, -118, -120, -121, -123, -125, -126, -128, + -130, -132, -133, -135, -137, -138, -140, -142, + -143, -145, -147, -149, -150, -152, -154, -155, + -157, -159, -161, -162, -164, -166, -167, -169, + -171, -173, -174, -176, -178, -180, -181, -183, + -185, -186, -188, -190, -192, -193, -195, -197, + -199, -200, -202, -204, -206, -207, -209, -211, + -213, -214, -216, -218, -219, -221, -223, -225, + -226, -228, -230, -232, -233, -235, -237, -239, + -240, -242, -244, -246, -247, -249, -251, -253, + -255, -256, -258, -260, -262, -263, -265, -267, + -269, -270, -272, -274, -276, -277, -279, -281, + -283, -284, -286, -288, -290, -291, -293, -295, + -297, -299, -300, -302, -304, -306, -307, -309, + -311, -313, -314, -316, -318, -320, -321, -323, + -325, -327, -328, -330, -332, -334, -336, -337, + -339, -341, -343, -344, -346, -348, -350, -351, + -353, -355, -357, -358, -360, -362, -364, -365, + -367, -369, -371, -372, -374, -376, -378, -379, + -381, -383, -385, -386, -388, -390, -392, -393, + -395, -397, -399, -400, -402, -404, -406, -407, + -409, -411, -413, -414, -416, -418, -419, -421, + -423, -425, -426, -428, -430, -432, -433, -435, + -437, -438, -440, -442, -444, -445, -447, -449, + -450, -452, -454, -456, -457, -459, -461, -462, + -464, -466, -468, -469, -471, -473, -474, -476, + -478, -479, -481, -483, -485, -486, -488, -490, + -491, -493, -495, -496, -498, -500, -501, -503, + -505, -506, -508, -510, -511, -513, -515, -516, + -518, -520, -521, -523, -525, -526, -528, -530, + -531, -533, -534, -536, -538, -539, -541, -543, + -544, -546, -547, -549, -551, -552, -554, -556, + -557, -559, -560, -562, -564, -565, -567, -568, + -570, -572, -573, -575, -576, -578, -579, -581, + -583, -584, -586, -587, -589, -590, -592, -594, + -595, -597, -598, -600, -601, -603, -604, -606, + -607, -609, -610, -612, -614, -615, -617, -618, + -620, -621, -623, -624, -626, -627, -629, -630, + -632, -633, -635, -636, -638, -639, -640, -642, + -643, -645, -646, -648, -649, -651, -652, -654, + -655, -656, -658, -659, -661, -662, -664, -665, + -666, -668, -669, -671, -672, -673, -675, -676, + -678, -679, -680, -682, -683, -685, -686, -687, + -689, -690, -691, -693, -694, -695, -697, -698, + -699, -701, -702, -703, -705, -706, -707, -709, + -710, -711, -713, -714, -715, -716, -718, -719, + -720, -722, -723, -724, -725, -727, -728, -729, + -730, -732, -733, -734, -735, -736, -738, -739, + -740, -741, -743, -744, -745, -746, -747, -748, + -750, -751, -752, -753, -754, -755, -757, -758, + -759, -760, -761, -762, -763, -765, -766, -767, + -768, -769, -770, -771, -772, -773, -774, -776, + -777, -778, -779, -780, -781, -782, -783, -784, + -785, -786, -787, -788, -789, -790, -791, -792, + -793, -794, -795, -796, -797, -798, -799, -800, + -801, -802, -803, -804, -805, -806, -807, -808, + -808, -809, -810, -811, -812, -813, -814, -815, + -816, -816, -817, -818, -819, -820, -821, -822, + -822, -823, -824, -825, -826, -827, -827, -828, + -829, -830, -831, -831, -832, -833, -834, -834, + -835, -836, -837, -837, -838, -839, -840, -840, + -841, -842, -842, -843, -844, -844, -845, -846, + -846, -847, -848, -848, -849, -850, -850, -851, + -852, -852, -853, -853, -854, -855, -855, -856, + -856, -857, -857, -858, -859, -859, -860, -860, + -861, -861, -862, -862, -863, -863, -864, -864, + -865, -865, -866, -866, -867, -867, -867, -868, + -868, -869, -869, -870, -870, -870, -871, -871, + -871, -872, -872, -873, -873, -873, -874, -874, + -874, -875, -875, -875, -876, -876, -876, -876, + -877, -877, -877, -878, -878, -878, -878, -879, + -879, -879, -879, -879, -880, -880, -880, -880, + -880, -881, -881, -881, -881, -881, -881, -881, + -882, -882, -882, -882, -882, -882, -882, -882, + -882, -882, -882, -882, -883, -883, -883, -883, + -883, -883, -883, -883, -883, -883, -883, -883, + -883, -883, -882, -882, -882, -882, -882, -882, + -882, -882, -882, -882, -882, -882, -881, -881, + -881, -881, -881, -881, -880, -880, -880, -880, + -880, -879, -879, -879, -879, -879, -878, -878, + -878, -878, -877, -877, -877, -876, -876, -876, + -876, -875, -875, -875, -874, -874, -874, -873, + -873, -872, -872, -872, -871, -871, -870, -870, + -870, -869, -869, -868, -868, -867, -867, -866, + -866, -865, -865, -864, -864, -863, -863, -862, + -862, -861, -861, -860, -860, -859, -858, -858, + -857, -857, -856, -855, -855, -854, -854, -853, + -852, -852, -851, -850, -850, -849, -848, -847, + -847, -846, -845, -845, -844, -843, -842, -842, + -841, -840, -839, -838, -838, -837, -836, -835, + -834, -833, -833, -832, -831, -830, -829, -828, + -827, -827, -826, -825, -824, -823, -822, -821, + -820, -819, -818, -817, -816, -815, -814, -813, + -812, -811, -810, -809, -808, -807, -806, -805, + -804, -803, -802, -801, -800, -798, -797, -796, + -795, -794, -793, -792, -791, -789, -788, -787, + -786, -785, -783, -782, -781, -780, -779, -777, + -776, -775, -774, -772, -771, -770, -769, -767, + -766, -765, -763, -762, -761, -759, -758, -757, + -755, -754, -753, -751, -750, -748, -747, -746, + -744, -743, -741, -740, -738, -737, -735, -734, + -733, -731, -730, -728, -727, -725, -724, -722, + -720, -719, -717, -716, -714, -713, -711, -710, + -708, -706, -705, -703, -702, -700, -698, -697, + -695, -693, -692, -690, -688, -687, -685, -683, + -682, -680, -678, -676, -675, -673, -671, -669, + -668, -666, -664, -662, -661, -659, -657, -655, + -653, -651, -650, -648, -646, -644, -642, -640, + -639, -637, -635, -633, -631, -629, -627, -625, + -623, -621, -619, -617, -615, -614, -612, -610, + -608, -606, -604, -602, -600, -598, -596, -594, + -592, -589, -587, -585, -583, -581, -579, -577, + -575, -573, -571, -569, -567, -564, -562, -560, + -558, -556, -554, -552, -549, -547, -545, -543, + -541, -538, -536, -534, -532, -530, -527, -525, + -523, -521, -518, -516, -514, -512, -509, -507, + -505, -502, -500, -498, -495, -493, -491, -488, + -486, -484, -481, -479, -477, -474, -472, -469, + -467, -465, -462, -460, -457, -455, -453, -450, + -448, -445, -443, -440, -438, -435, -433, -430, + -428, -425, -423, -420, -418, -415, -413, -410, + -408, -405, -403, -400, -398, -395, -392, -390, + -387, -385, -382, -380, -377, -374, -372, -369, + -366, -364, -361, -359, -356, -353, -351, -348, + -345, -343, -340, -337, -334, -332, -329, -326, + -324, -321, -318, -315, -313, -310, -307, -305, + -302, -299, -296, -293, -291, -288, -285, -282, + -280, -277, -274, -271, -268, -266, -263, -260, + -257, -254, -251, -248, -246, -243, -240, -237, + -234, -231, -228, -226, -223, -220, -217, -214, + -211, -208, -205, -202, -199, -196, -194, -191, + -188, -185, -182, -179, -176, -173, -170, -167, + -164, -161, -158, -155, -152, -149, -146, -143, + -140, -137, -134, -131, -128, -125, -122, -119, + -116, -113, -110, -107, -104, -100, -97, -94, + -91, -88, -85, -82, -79, -76, -73, -70, + -67, -63, -60, -57, -54, -51, -48, -45, + -42, -39, -35, -32, -29, -26, -23, -20, + -16, -13, -10, -7, -4, -1, 2, 6, + 9, 12, 15, 18, 22, 25, 28, 31, + 34, 38, 41, 44, 47, 50, 54, 57, + 60, 63, 67, 70, 73, 76, 79, 83, + 86, 89, 92, 96, 99, 102, 105, 109, + 112, 115, 118, 122, 125, 128, 132, 135, + 138, 141, 145, 148, 151, 154, 158, 161, + 164, 168, 171, 174, 178, 181, 184, 187, + 191, 194, 197, 201, 204, 207, 211, 214, + 217, 221, 224, 227, 230, 234, 237, 240, + 244, 247, 250, 254, 257, 260, 264, 267, + 270, 274, 277, 280, 284, 287, 290, 294, + 297, 300, 304, 307, 310, 314, 317, 320, + 324, 327, 330, 334, 337, 341, 344, 347, + 351, 354, 357, 361, 364, 367, 371, 374, + 377, 381, 384, 387, 391, 394, 397, 401, + 404, 407, 411, 414, 417, 421, 424, 427, + 431, 434, 437, 441, 444, 448, 451, 454, + 458, 461, 464, 468, 471, 474, 478, 481, + 484, 488, 491, 494, 498, 501, 504, 507, + 511, 514, 517, 521, 524, 527, 531, 534, + 537, 541, 544, 547, 551, 554, 557, 560, + 564, 567, 570, 574, 577, 580, 584, 587, + 590, 593, 597, 600, 603, 606, 610, 613, + 616, 620, 623, 626, 629, 633, 636, 639, + 642, 646, 649, 652, 655, 659, 662, 665, + 668, 671, 675, 678, 681, 684, 688, 691, + 694, 697, 700, 704, 707, 710, 713, 716, + 720, 723, 726, 729, 732, 735, 739, 742, + 745, 748, 751, 754, 758, 761, 764, 767, + 770, 773, 776, 779, 783, 786, 789, 792, + 795, 798, 801, 804, 807, 810, 813, 817, + 820, 823, 826, 829, 832, 835, 838, 841, + 844, 847, 850, 853, 856, 859, 862, 865, + 868, 871, 874, 877, 880, 883, 886, 889, + 892, 895, 898, 901, 904, 907, 910, 913, + 915, 918, 921, 924, 927, 930, 933, 936, + 939, 942, 944, 947, 950, 953, 956, 959, + 961, 964, 967, 970, 973, 976, 978, 981, + 984, 987, 990, 992, 995, 998, 1001, 1003, + 1006, 1009, 1012, 1014, 1017, 1020, 1022, 1025, + 1028, 1030, 1033, 1036, 1038, 1041, 1044, 1046, + 1049, 1052, 1054, 1057, 1060, 1062, 1065, 1067, + 1070, 1073, 1075, 1078, 1080, 1083, 1085, 1088, + 1090, 1093, 1095, 1098, 1100, 1103, 1105, 1108, + 1110, 1113, 1115, 1118, 1120, 1123, 1125, 1128, + 1130, 1132, 1135, 1137, 1140, 1142, 1144, 1147, + 1149, 1151, 1154, 1156, 1158, 1161, 1163, 1165, + 1168, 1170, 1172, 1174, 1177, 1179, 1181, 1183, + 1186, 1188, 1190, 1192, 1195, 1197, 1199, 1201, + 1203, 1205, 1208, 1210, 1212, 1214, 1216, 1218, + 1220, 1222, 1224, 1227, 1229, 1231, 1233, 1235, + 1237, 1239, 1241, 1243, 1245, 1247, 1249, 1251, + 1253, 1255, 1257, 1258, 1260, 1262, 1264, 1266, + 1268, 1270, 1272, 1274, 1275, 1277, 1279, 1281, + 1283, 1284, 1286, 1288, 1290, 1292, 1293, 1295, + 1297, 1299, 1300, 1302, 1304, 1305, 1307, 1309, + 1310, 1312, 1314, 1315, 1317, 1318, 1320, 1322, + 1323, 1325, 1326, 1328, 1329, 1331, 1332, 1334, + 1335, 1337, 1338, 1340, 1341, 1343, 1344, 1346, + 1347, 1348, 1350, 1351, 1353, 1354, 1355, 1357, + 1358, 1359, 1361, 1362, 1363, 1364, 1366, 1367, + 1368, 1369, 1371, 1372, 1373, 1374, 1375, 1377, + 1378, 1379, 1380, 1381, 1382, 1383, 1384, 1386, + 1387, 1388, 1389, 1390, 1391, 1392, 1393, 1394, + 1395, 1396, 1397, 1398, 1399, 1400, 1400, 1401, + 1402, 1403, 1404, 1405, 1406, 1406, 1407, 1408, + 1409, 1410, 1410, 1411, 1412, 1413, 1413, 1414, + 1415, 1416, 1416, 1417, 1418, 1418, 1419, 1420, + 1420, 1421, 1421, 1422, 1422, 1423, 1424, 1424, + 1425, 1425, 1426, 1426, 1427, 1427, 1427, 1428, + 1428, 1429, 1429, 1429, 1430, 1430, 1431, 1431, + 1431, 1432, 1432, 1432, 1432, 1433, 1433, 1433, + 1433, 1434, 1434, 1434, 1434, 1434, 1434, 1435, + 1435, 1435, 1435, 1435, 1435, 1435, 1435, 1435, + 1435, 1435, 1435, 1435, 1435, 1435, 1435, 1435, + 1435, 1435, 1435, 1435, 1435, 1434, 1434, 1434, + 1434, 1434, 1434, 1433, 1433, 1433, 1433, 1432, + 1432, 1432, 1432, 1431, 1431, 1431, 1430, 1430, + 1429, 1429, 1429, 1428, 1428, 1427, 1427, 1426, + 1426, 1425, 1425, 1424, 1424, 1423, 1423, 1422, + 1422, 1421, 1420, 1420, 1419, 1419, 1418, 1417, + 1416, 1416, 1415, 1414, 1414, 1413, 1412, 1411, + 1410, 1410, 1409, 1408, 1407, 1406, 1405, 1405, + 1404, 1403, 1402, 1401, 1400, 1399, 1398, 1397, + 1396, 1395, 1394, 1393, 1392, 1391, 1390, 1389, + 1387, 1386, 1385, 1384, 1383, 1382, 1380, 1379, + 1378, 1377, 1376, 1374, 1373, 1372, 1370, 1369, + 1368, 1367, 1365, 1364, 1362, 1361, 1360, 1358, + 1357, 1355, 1354, 1352, 1351, 1349, 1348, 1346, + 1345, 1343, 1342, 1340, 1339, 1337, 1335, 1334, + 1332, 1330, 1329, 1327, 1325, 1324, 1322, 1320, + 1319, 1317, 1315, 1313, 1311, 1310, 1308, 1306, + 1304, 1302, 1300, 1298, 1297, 1295, 1293, 1291, + 1289, 1287, 1285, 1283, 1281, 1279, 1277, 1275, + 1273, 1271, 1268, 1266, 1264, 1262, 1260, 1258, + 1256, 1253, 1251, 1249, 1247, 1245, 1242, 1240, + 1238, 1236, 1233, 1231, 1229, 1226, 1224, 1222, + 1219, 1217, 1214, 1212, 1210, 1207, 1205, 1202, + 1200, 1197, 1195, 1192, 1190, 1187, 1185, 1182, + 1179, 1177, 1174, 1172, 1169, 1166, 1164, 1161, + 1158, 1156, 1153, 1150, 1148, 1145, 1142, 1139, + 1136, 1134, 1131, 1128, 1125, 1122, 1119, 1117, + 1114, 1111, 1108, 1105, 1102, 1099, 1096, 1093, + 1090, 1087, 1084, 1081, 1078, 1075, 1072, 1069, + 1066, 1063, 1060, 1057, 1053, 1050, 1047, 1044, + 1041, 1038, 1034, 1031, 1028, 1025, 1021, 1018, + 1015, 1012, 1008, 1005, 1002, 998, 995, 992, + 988, 985, 981, 978, 975, 971, 968, 964, + 961, 957, 954, 950, 947, 943, 940, 936, + 933, 929, 926, 922, 918, 915, 911, 908, + 904, 900, 897, 893, 889, 885, 882, 878, + 874, 871, 867, 863, 859, 855, 852, 848, + 844, 840, 836, 832, 829, 825, 821, 817, + 813, 809, 805, 801, 797, 793, 789, 785, + 781, 777, 773, 769, 765, 761, 757, 753, + 749, 745, 741, 737, 732, 728, 724, 720, + 716, 712, 707, 703, 699, 695, 691, 686, + 682, 678, 674, 669, 665, 661, 656, 652, + 648, 643, 639, 635, 630, 626, 622, 617, + 613, 608, 604, 600, 595, 591, 586, 582, + 577, 573, 568, 564, 559, 555, 550, 546, + 541, 537, 532, 528, 523, 518, 514, 509, + 505, 500, 495, 491, 486, 481, 477, 472, + 467, 463, 458, 453, 448, 444, 439, 434, + 429, 425, 420, 415, 410, 406, 401, 396, + 391, 386, 382, 377, 372, 367, 362, 357, + 352, 347, 343, 338, 333, 328, 323, 318, + 313, 308, 303, 298, 293, 288, 283, 278, + 273, 268, 263, 258, 253, 248, 243, 238, + 233, 228, 223, 218, 213, 208, 203, 198, + 193, 187, 182, 177, 172, 167, 162, 157, + 152, 146, 141, 136, 131, 126, 121, 115, + 110, 105, 100, 95, 89, 84, 79, 74, + 68, 63, 58, 53, 47, 42, 37, 32, + 26, 21, 16, 10, 5, 0, -5, -11, + -16, -21, -27, -32, -37, -43, -48, -53, + -59, -64, -70, -75, -80, -86, -91, -96, + -102, -107, -113, -118, -123, -129, -134, -140, + -145, -150, -156, -161, -167, -172, -178, -183, + -188, -194, -199, -205, -210, -216, -221, -227, + -232, -238, -243, -248, -254, -259, -265, -270, + -276, -281, -287, -292, -298, -303, -309, -314, + -320, -325, -331, -336, -342, -347, -353, -358, + -364, -369, -375, -381, -386, -392, -397, -403, + -408, -414, -419, -425, -430, -436, -441, -447, + -452, -458, -464, -469, -475, -480, -486, -491, + -497, -502, -508, -513, -519, -524, -530, -536, + -541, -547, -552, -558, -563, -569, -574, -580, + -585, -591, -597, -602, -608, -613, -619, -624, + -630, -635, -641, -646, -652, -658, -663, -669, + -674, -680, -685, -691, -696, -702, -707, -713, + -718, -724, -729, -735, -740, -746, -751, -757, + -762, -768, -773, -779, -784, -790, -795, -801, + -806, -812, -817, -823, -828, -834, -839, -845, + -850, -856, -861, -867, -872, -877, -883, -888, + -894, -899, -905, -910, -916, -921, -926, -932, + -937, -943, -948, -953, -959, -964, -970, -975, + -980, -986, -991, -997, -1002, -1007, -1013, -1018, + -1023, -1029, -1034, -1039, -1045, -1050, -1055, -1061, + -1066, -1071, -1077, -1082, -1087, -1092, -1098, -1103, + -1108, -1114, -1119, -1124, -1129, -1135, -1140, -1145, + -1150, -1155, -1161, -1166, -1171, -1176, -1181, -1187, + -1192, -1197, -1202, -1207, -1212, -1218, -1223, -1228, + -1233, -1238, -1243, -1248, -1253, -1258, -1264, -1269, + -1274, -1279, -1284, -1289, -1294, -1299, -1304, -1309, + -1314, -1319, -1324, -1329, -1334, -1339, -1344, -1349, + -1354, -1359, -1364, -1369, -1374, -1378, -1383, -1388, + -1393, -1398, -1403, -1408, -1413, -1417, -1422, -1427, + -1432, -1437, -1441, -1446, -1451, -1456, -1461, -1465, + -1470, -1475, -1480, -1484, -1489, -1494, -1498, -1503, + -1508, -1512, -1517, -1522, -1526, -1531, -1535, -1540, + -1545, -1549, -1554, -1558, -1563, -1567, -1572, -1577, + -1581, -1586, -1590, -1594, -1599, -1603, -1608, -1612, + -1617, -1621, -1625, -1630, -1634, -1639, -1643, -1647, + -1652, -1656, -1660, -1665, -1669, -1673, -1677, -1682, + -1686, -1690, -1694, -1699, -1703, -1707, -1711, -1715, + -1719, -1724, -1728, -1732, -1736, -1740, -1744, -1748, + -1752, -1756, -1760, -1764, -1768, -1772, -1776, -1780, + -1784, -1788, -1792, -1796, -1800, -1804, -1807, -1811, + -1815, -1819, -1823, -1827, -1830, -1834, -1838, -1842, + -1845, -1849, -1853, -1857, -1860, -1864, -1868, -1871, + -1875, -1878, -1882, -1886, -1889, -1893, -1896, -1900, + -1903, -1907, -1910, -1914, -1917, -1921, -1924, -1927, + -1931, -1934, -1938, -1941, -1944, -1948, -1951, -1954, + -1957, -1961, -1964, -1967, -1970, -1974, -1977, -1980, + -1983, -1986, -1989, -1992, -1996, -1999, -2002, -2005, + -2008, -2011, -2014, -2017, -2020, -2023, -2026, -2029, + -2031, -2034, -2037, -2040, -2043, -2046, -2049, -2051, + -2054, -2057, -2060, -2062, -2065, -2068, -2070, -2073, + -2076, -2078, -2081, -2083, -2086, -2089, -2091, -2094, + -2096, -2099, -2101, -2104, -2106, -2108, -2111, -2113, + -2116, -2118, -2120, -2122, -2125, -2127, -2129, -2132, + -2134, -2136, -2138, -2140, -2142, -2145, -2147, -2149, + -2151, -2153, -2155, -2157, -2159, -2161, -2163, -2165, + -2167, -2169, -2171, -2173, -2174, -2176, -2178, -2180, + -2182, -2184, -2185, -2187, -2189, -2190, -2192, -2194, + -2195, -2197, -2199, -2200, -2202, -2203, -2205, -2206, + -2208, -2209, -2211, -2212, -2214, -2215, -2216, -2218, + -2219, -2220, -2222, -2223, -2224, -2225, -2227, -2228, + -2229, -2230, -2231, -2232, -2233, -2235, -2236, -2237, + -2238, -2239, -2240, -2241, -2242, -2242, -2243, -2244, + -2245, -2246, -2247, -2248, -2248, -2249, -2250, -2251, + -2251, -2252, -2253, -2253, -2254, -2254, -2255, -2256, + -2256, -2257, -2257, -2258, -2258, -2259, -2259, -2259, + -2260, -2260, -2260, -2261, -2261, -2261, -2261, -2262, + -2262, -2262, -2262, -2262, -2263, -2263, -2263, -2263, + -2263, -2263, -2263, -2263, -2263, -2263, -2263, -2263, + -2262, -2262, -2262, -2262, -2262, -2261, -2261, -2261, + -2261, -2260, -2260, -2260, -2259, -2259, -2258, -2258, + -2257, -2257, -2256, -2256, -2255, -2255, -2254, -2254, + -2253, -2252, -2252, -2251, -2250, -2249, -2249, -2248, + -2247, -2246, -2245, -2244, -2244, -2243, -2242, -2241, + -2240, -2239, -2238, -2237, -2236, -2234, -2233, -2232, + -2231, -2230, -2229, -2227, -2226, -2225, -2224, -2222, + -2221, -2220, -2218, -2217, -2215, -2214, -2212, -2211, + -2209, -2208, -2206, -2205, -2203, -2202, -2200, -2198, + -2197, -2195, -2193, -2191, -2190, -2188, -2186, -2184, + -2182, -2180, -2179, -2177, -2175, -2173, -2171, -2169, + -2167, -2165, -2162, -2160, -2158, -2156, -2154, -2152, + -2150, -2147, -2145, -2143, -2140, -2138, -2136, -2133, + -2131, -2129, -2126, -2124, -2121, -2119, -2116, -2114, + -2111, -2108, -2106, -2103, -2101, -2098, -2095, -2092, + -2090, -2087, -2084, -2081, -2079, -2076, -2073, -2070, + -2067, -2064, -2061, -2058, -2055, -2052, -2049, -2046, + -2043, -2040, -2037, -2033, -2030, -2027, -2024, -2021, + -2017, -2014, -2011, -2007, -2004, -2001, -1997, -1994, + -1990, -1987, -1983, -1980, -1976, -1973, -1969, -1966, + -1962, -1958, -1955, -1951, -1947, -1944, -1940, -1936, + -1932, -1928, -1925, -1921, -1917, -1913, -1909, -1905, + -1901, -1897, -1893, -1889, -1885, -1881, -1877, -1873, + -1869, -1864, -1860, -1856, -1852, -1848, -1843, -1839, + -1835, -1830, -1826, -1822, -1817, -1813, -1809, -1804, + -1800, -1795, -1791, -1786, -1782, -1777, -1772, -1768, + -1763, -1758, -1754, -1749, -1744, -1740, -1735, -1730, + -1725, -1720, -1716, -1711, -1706, -1701, -1696, -1691, + -1686, -1681, -1676, -1671, -1666, -1661, -1656, -1651, + -1646, -1640, -1635, -1630, -1625, -1620, -1614, -1609, + -1604, -1599, -1593, -1588, -1582, -1577, -1572, -1566, + -1561, -1555, -1550, -1544, -1539, -1533, -1528, -1522, + -1516, -1511, -1505, -1500, -1494, -1488, -1482, -1477, + -1471, -1465, -1459, -1454, -1448, -1442, -1436, -1430, + -1424, -1418, -1412, -1406, -1400, -1394, -1388, -1382, + -1376, -1370, -1364, -1358, -1352, -1346, -1339, -1333, + -1327, -1321, -1315, -1308, -1302, -1296, -1289, -1283, + -1277, -1270, -1264, -1258, -1251, -1245, -1238, -1232, + -1225, -1219, -1212, -1206, -1199, -1193, -1186, -1179, + -1173, -1166, -1159, -1153, -1146, -1139, -1133, -1126, + -1119, -1112, -1105, -1099, -1092, -1085, -1078, -1071, + -1064, -1057, -1050, -1044, -1037, -1030, -1023, -1016, + -1009, -1001, -994, -987, -980, -973, -966, -959, + -952, -945, -937, -930, -923, -916, -908, -901, + -894, -887, -879, -872, -865, -857, -850, -842, + -835, -828, -820, -813, -805, -798, -790, -783, + -775, -768, -760, -753, -745, -738, -730, -722, + -715, -707, -699, -692, -684, -676, -669, -661, + -653, -645, -638, -630, -622, -614, -606, -599, + -591, -583, -575, -567, -559, -551, -543, -536, + -528, -520, -512, -504, -496, -488, -480, -472, + -464, -456, -447, -439, -431, -423, -415, -407, + -399, -391, -382, -374, -366, -358, -350, -342, + -333, -325, -317, -309, -300, -292, -284, -275, + -267, -259, -250, -242, -234, -225, -217, -209, + -200, -192, -183, -175, -167, -158, -150, -141, + -133, -124, -116, -107, -99, -90, -82, -73, + -65, -56, -48, -39, -30, -22, -13, -5, + 4, 12, 21, 30, 38, 47, 56, 64, + 73, 82, 90, 99, 108, 116, 125, 134, + 143, 151, 160, 169, 178, 186, 195, 204, + 213, 221, 230, 239, 248, 257, 265, 274, + 283, 292, 301, 310, 318, 327, 336, 345, + 354, 363, 372, 381, 389, 398, 407, 416, + 425, 434, 443, 452, 461, 470, 479, 488, + 496, 505, 514, 523, 532, 541, 550, 559, + 568, 577, 586, 595, 604, 613, 622, 631, + 640, 649, 658, 667, 676, 685, 694, 703, + 712, 721, 730, 739, 748, 757, 766, 775, + 784, 793, 802, 811, 820, 829, 838, 847, + 856, 865, 874, 883, 892, 901, 910, 920, + 929, 938, 947, 956, 965, 974, 983, 992, + 1001, 1010, 1019, 1028, 1037, 1046, 1055, 1064, + 1073, 1082, 1091, 1100, 1109, 1118, 1127, 1136, + 1145, 1154, 1163, 1172, 1181, 1190, 1199, 1208, + 1217, 1226, 1235, 1244, 1253, 1262, 1271, 1280, + 1289, 1298, 1307, 1316, 1324, 1333, 1342, 1351, + 1360, 1369, 1378, 1387, 1396, 1405, 1414, 1423, + 1431, 1440, 1449, 1458, 1467, 1476, 1485, 1493, + 1502, 1511, 1520, 1529, 1538, 1546, 1555, 1564, + 1573, 1582, 1590, 1599, 1608, 1617, 1625, 1634, + 1643, 1652, 1660, 1669, 1678, 1687, 1695, 1704, + 1713, 1721, 1730, 1739, 1747, 1756, 1765, 1773, + 1782, 1790, 1799, 1808, 1816, 1825, 1833, 1842, + 1850, 1859, 1867, 1876, 1884, 1893, 1901, 1910, + 1918, 1927, 1935, 1944, 1952, 1961, 1969, 1977, + 1986, 1994, 2002, 2011, 2019, 2027, 2036, 2044, + 2052, 2061, 2069, 2077, 2085, 2094, 2102, 2110, + 2118, 2127, 2135, 2143, 2151, 2159, 2167, 2175, + 2183, 2192, 2200, 2208, 2216, 2224, 2232, 2240, + 2248, 2256, 2264, 2272, 2280, 2288, 2296, 2303, + 2311, 2319, 2327, 2335, 2343, 2351, 2358, 2366, + 2374, 2382, 2389, 2397, 2405, 2412, 2420, 2428, + 2435, 2443, 2451, 2458, 2466, 2473, 2481, 2489, + 2496, 2504, 2511, 2518, 2526, 2533, 2541, 2548, + 2556, 2563, 2570, 2578, 2585, 2592, 2599, 2607, + 2614, 2621, 2628, 2636, 2643, 2650, 2657, 2664, + 2671, 2678, 2685, 2692, 2700, 2707, 2714, 2720, + 2727, 2734, 2741, 2748, 2755, 2762, 2769, 2776, + 2782, 2789, 2796, 2803, 2809, 2816, 2823, 2829, + 2836, 2843, 2849, 2856, 2862, 2869, 2875, 2882, + 2888, 2895, 2901, 2908, 2914, 2920, 2927, 2933, + 2939, 2945, 2952, 2958, 2964, 2970, 2977, 2983, + 2989, 2995, 3001, 3007, 3013, 3019, 3025, 3031, + 3037, 3043, 3049, 3055, 3060, 3066, 3072, 3078, + 3084, 3089, 3095, 3101, 3106, 3112, 3118, 3123, + 3129, 3134, 3140, 3145, 3151, 3156, 3162, 3167, + 3172, 3178, 3183, 3188, 3193, 3199, 3204, 3209, + 3214, 3219, 3224, 3230, 3235, 3240, 3245, 3250, + 3255, 3260, 3264, 3269, 3274, 3279, 3284, 3289, + 3293, 3298, 3303, 3307, 3312, 3317, 3321, 3326, + 3330, 3335, 3339, 3344, 3348, 3352, 3357, 3361, + 3365, 3370, 3374, 3378, 3382, 3387, 3391, 3395, + 3399, 3403, 3407, 3411, 3415, 3419, 3423, 3427, + 3430, 3434, 3438, 3442, 3446, 3449, 3453, 3457, + 3460, 3464, 3467, 3471, 3474, 3478, 3481, 3485, + 3488, 3491, 3495, 3498, 3501, 3505, 3508, 3511, + 3514, 3517, 3520, 3523, 3526, 3529, 3532, 3535, + 3538, 3541, 3544, 3547, 3549, 3552, 3555, 3557, + 3560, 3563, 3565, 3568, 3570, 3573, 3575, 3578, + 3580, 3583, 3585, 3587, 3589, 3592, 3594, 3596, + 3598, 3600, 3602, 3604, 3606, 3608, 3610, 3612, + 3614, 3616, 3618, 3620, 3621, 3623, 3625, 3626, + 3628, 3630, 3631, 3633, 3634, 3635, 3637, 3638, + 3640, 3641, 3642, 3643, 3645, 3646, 3647, 3648, + 3649, 3650, 3651, 3652, 3653, 3654, 3655, 3656, + 3657, 3657, 3658, 3659, 3659, 3660, 3661, 3661, + 3662, 3662, 3663, 3663, 3664, 3664, 3664, 3665, + 3665, 3665, 3665, 3665, 3665, 3666, 3666, 3666, + 3666, 3666, 3665, 3665, 3665, 3665, 3665, 3664, + 3664, 3664, 3663, 3663, 3663, 3662, 3662, 3661, + 3660, 3660, 3659, 3658, 3658, 3657, 3656, 3655, + 3654, 3653, 3652, 3651, 3650, 3649, 3648, 3647, + 3646, 3645, 3644, 3642, 3641, 3640, 3638, 3637, + 3635, 3634, 3632, 3631, 3629, 3627, 3626, 3624, + 3622, 3621, 3619, 3617, 3615, 3613, 3611, 3609, + 3607, 3605, 3603, 3601, 3598, 3596, 3594, 3592, + 3589, 3587, 3585, 3582, 3580, 3577, 3575, 3572, + 3569, 3567, 3564, 3561, 3558, 3556, 3553, 3550, + 3547, 3544, 3541, 3538, 3535, 3532, 3528, 3525, + 3522, 3519, 3515, 3512, 3509, 3505, 3502, 3498, + 3495, 3491, 3488, 3484, 3480, 3477, 3473, 3469, + 3465, 3461, 3458, 3454, 3450, 3446, 3442, 3437, + 3433, 3429, 3425, 3421, 3416, 3412, 3408, 3403, + 3399, 3395, 3390, 3385, 3381, 3376, 3372, 3367, + 3362, 3357, 3353, 3348, 3343, 3338, 3333, 3328, + 3323, 3318, 3313, 3308, 3303, 3297, 3292, 3287, + 3282, 3276, 3271, 3265, 3260, 3254, 3249, 3243, + 3238, 3232, 3226, 3221, 3215, 3209, 3203, 3197, + 3191, 3185, 3179, 3173, 3167, 3161, 3155, 3149, + 3143, 3136, 3130, 3124, 3117, 3111, 3105, 3098, + 3092, 3085, 3079, 3072, 3065, 3059, 3052, 3045, + 3038, 3031, 3025, 3018, 3011, 3004, 2997, 2990, + 2983, 2975, 2968, 2961, 2954, 2947, 2939, 2932, + 2925, 2917, 2910, 2902, 2895, 2887, 2879, 2872, + 2864, 2856, 2849, 2841, 2833, 2825, 2817, 2809, + 2801, 2793, 2785, 2777, 2769, 2761, 2753, 2745, + 2737, 2728, 2720, 2712, 2703, 2695, 2686, 2678, + 2669, 2661, 2652, 2644, 2635, 2626, 2617, 2609, + 2600, 2591, 2582, 2573, 2564, 2555, 2546, 2537, + 2528, 2519, 2510, 2501, 2491, 2482, 2473, 2464, + 2454, 2445, 2435, 2426, 2417, 2407, 2397, 2388, + 2378, 2369, 2359, 2349, 2339, 2330, 2320, 2310, + 2300, 2290, 2280, 2270, 2260, 2250, 2240, 2230, + 2220, 2209, 2199, 2189, 2179, 2168, 2158, 2148, + 2137, 2127, 2116, 2106, 2095, 2085, 2074, 2063, + 2053, 2042, 2031, 2021, 2010, 1999, 1988, 1977, + 1966, 1955, 1944, 1933, 1922, 1911, 1900, 1889, + 1878, 1867, 1855, 1844, 1833, 1821, 1810, 1799, + 1787, 1776, 1764, 1753, 1741, 1730, 1718, 1707, + 1695, 1683, 1671, 1660, 1648, 1636, 1624, 1612, + 1601, 1589, 1577, 1565, 1553, 1541, 1529, 1517, + 1504, 1492, 1480, 1468, 1456, 1443, 1431, 1419, + 1407, 1394, 1382, 1369, 1357, 1344, 1332, 1319, + 1307, 1294, 1282, 1269, 1256, 1244, 1231, 1218, + 1205, 1193, 1180, 1167, 1154, 1141, 1128, 1115, + 1102, 1089, 1076, 1063, 1050, 1037, 1024, 1011, + 998, 984, 971, 958, 945, 931, 918, 905, + 891, 878, 864, 851, 838, 824, 811, 797, + 783, 770, 756, 743, 729, 715, 702, 688, + 674, 660, 647, 633, 619, 605, 591, 577, + 563, 549, 535, 521, 507, 493, 479, 465, + 451, 437, 423, 409, 395, 380, 366, 352, + 338, 323, 309, 295, 280, 266, 252, 237, + 223, 209, 194, 180, 165, 151, 136, 122, + 107, 92, 78, 63, 49, 34, 19, 5, + -10, -25, -40, -54, -69, -84, -99, -114, + -128, -143, -158, -173, -188, -203, -218, -233, + -248, -263, -278, -293, -308, -323, -338, -353, + -368, -383, -398, -413, -428, -443, -459, -474, + -489, -504, -519, -535, -550, -565, -580, -596, + -611, -626, -642, -657, -672, -688, -703, -718, + -734, -749, -765, -780, -795, -811, -826, -842, + -857, -873, -888, -904, -919, -935, -950, -966, + -981, -997, -1013, -1028, -1044, -1059, -1075, -1091, + -1106, -1122, -1137, -1153, -1169, -1184, -1200, -1216, + -1231, -1247, -1263, -1278, -1294, -1310, -1326, -1341, + -1357, -1373, -1389, -1404, -1420, -1436, -1452, -1467, + -1483, -1499, -1515, -1531, -1546, -1562, -1578, -1594, + -1610, -1625, -1641, -1657, -1673, -1689, -1705, -1720, + -1736, -1752, -1768, -1784, -1800, -1815, -1831, -1847, + -1863, -1879, -1895, -1911, -1926, -1942, -1958, -1974, + -1990, -2006, -2022, -2037, -2053, -2069, -2085, -2101, + -2117, -2133, -2148, -2164, -2180, -2196, -2212, -2228, + -2244, -2259, -2275, -2291, -2307, -2323, -2339, -2354, + -2370, -2386, -2402, -2418, -2433, -2449, -2465, -2481, + -2497, -2512, -2528, -2544, -2560, -2576, -2591, -2607, + -2623, -2639, -2654, -2670, -2686, -2701, -2717, -2733, + -2749, -2764, -2780, -2796, -2811, -2827, -2843, -2858, + -2874, -2890, -2905, -2921, -2936, -2952, -2968, -2983, + -2999, -3014, -3030, -3045, -3061, -3076, -3092, -3107, + -3123, -3138, -3154, -3169, -3185, -3200, -3216, -3231, + -3246, -3262, -3277, -3293, -3308, -3323, -3339, -3354, + -3369, -3385, -3400, -3415, -3430, -3446, -3461, -3476, + -3491, -3506, -3521, -3537, -3552, -3567, -3582, -3597, + -3612, -3627, -3642, -3657, -3672, -3687, -3702, -3717, + -3732, -3747, -3762, -3777, -3792, -3807, -3821, -3836, + -3851, -3866, -3881, -3895, -3910, -3925, -3939, -3954, + -3969, -3983, -3998, -4012, -4027, -4042, -4056, -4071, + -4085, -4100, -4114, -4128, -4143, -4157, -4172, -4186, + -4200, -4215, -4229, -4243, -4257, -4271, -4286, -4300, + -4314, -4328, -4342, -4356, -4370, -4384, -4398, -4412, + -4426, -4440, -4454, -4468, -4482, -4495, -4509, -4523, + -4537, -4550, -4564, -4578, -4591, -4605, -4619, -4632, + -4646, -4659, -4673, -4686, -4699, -4713, -4726, -4740, + -4753, -4766, -4779, -4793, -4806, -4819, -4832, -4845, + -4858, -4871, -4884, -4897, -4910, -4923, -4936, -4949, + -4962, -4974, -4987, -5000, -5013, -5025, -5038, -5051, + -5063, -5076, -5088, -5101, -5113, -5125, -5138, -5150, + -5162, -5175, -5187, -5199, -5211, -5224, -5236, -5248, + -5260, -5272, -5284, -5296, -5308, -5319, -5331, -5343, + -5355, -5366, -5378, -5390, -5401, -5413, -5424, -5436, + -5447, -5459, -5470, -5482, -5493, -5504, -5515, -5527, + -5538, -5549, -5560, -5571, -5582, -5593, -5604, -5615, + -5626, -5636, -5647, -5658, -5668, -5679, -5690, -5700, + -5711, -5721, -5732, -5742, -5752, -5763, -5773, -5783, + -5793, -5804, -5814, -5824, -5834, -5844, -5854, -5863, + -5873, -5883, -5893, -5903, -5912, -5922, -5931, -5941, + -5950, -5960, -5969, -5979, -5988, -5997, -6006, -6016, + -6025, -6034, -6043, -6052, -6061, -6070, -6078, -6087, + -6096, -6105, -6113, -6122, -6131, -6139, -6148, -6156, + -6164, -6173, -6181, -6189, -6197, -6205, -6214, -6222, + -6230, -6238, -6245, -6253, -6261, -6269, -6276, -6284, + -6292, -6299, -6307, -6314, -6321, -6329, -6336, -6343, + -6350, -6358, -6365, -6372, -6379, -6386, -6392, -6399, + -6406, -6413, -6419, -6426, -6433, -6439, -6445, -6452, + -6458, -6464, -6471, -6477, -6483, -6489, -6495, -6501, + -6507, -6513, -6519, -6524, -6530, -6536, -6541, -6547, + -6552, -6558, -6563, -6568, -6573, -6579, -6584, -6589, + -6594, -6599, -6604, -6608, -6613, -6618, -6623, -6627, + -6632, -6636, -6641, -6645, -6649, -6654, -6658, -6662, + -6666, -6670, -6674, -6678, -6682, -6685, -6689, -6693, + -6696, -6700, -6703, -6707, -6710, -6714, -6717, -6720, + -6723, -6726, -6729, -6732, -6735, -6738, -6741, -6743, + -6746, -6748, -6751, -6753, -6756, -6758, -6760, -6763, + -6765, -6767, -6769, -6771, -6773, -6774, -6776, -6778, + -6779, -6781, -6783, -6784, -6785, -6787, -6788, -6789, + -6790, -6791, -6792, -6793, -6794, -6795, -6795, -6796, + -6797, -6797, -6798, -6798, -6798, -6799, -6799, -6799, + -6799, -6799, -6799, -6799, -6799, -6798, -6798, -6798, + -6797, -6797, -6796, -6795, -6795, -6794, -6793, -6792, + -6791, -6790, -6789, -6788, -6786, -6785, -6784, -6782, + -6781, -6779, -6777, -6776, -6774, -6772, -6770, -6768, + -6766, -6764, -6761, -6759, -6757, -6754, -6752, -6749, + -6747, -6744, -6741, -6738, -6735, -6732, -6729, -6726, + -6723, -6719, -6716, -6713, -6709, -6706, -6702, -6698, + -6694, -6691, -6687, -6683, -6679, -6674, -6670, -6666, + -6662, -6657, -6653, -6648, -6643, -6639, -6634, -6629, + -6624, -6619, -6614, -6609, -6604, -6598, -6593, -6588, + -6582, -6576, -6571, -6565, -6559, -6553, -6547, -6541, + -6535, -6529, -6523, -6517, -6510, -6504, -6497, -6491, + -6484, -6477, -6470, -6463, -6456, -6449, -6442, -6435, + -6428, -6420, -6413, -6405, -6398, -6390, -6382, -6374, + -6367, -6359, -6351, -6343, -6334, -6326, -6318, -6309, + -6301, -6292, -6284, -6275, -6266, -6257, -6248, -6239, + -6230, -6221, -6212, -6202, -6193, -6184, -6174, -6164, + -6155, -6145, -6135, -6125, -6115, -6105, -6095, -6085, + -6074, -6064, -6054, -6043, -6032, -6022, -6011, -6000, + -5989, -5978, -5967, -5956, -5945, -5933, -5922, -5911, + -5899, -5887, -5876, -5864, -5852, -5840, -5828, -5816, + -5804, -5792, -5779, -5767, -5755, -5742, -5729, -5717, + -5704, -5691, -5678, -5665, -5652, -5639, -5626, -5612, + -5599, -5585, -5572, -5558, -5545, -5531, -5517, -5503, + -5489, -5475, -5461, -5447, -5432, -5418, -5403, -5389, + -5374, -5359, -5345, -5330, -5315, -5300, -5285, -5269, + -5254, -5239, -5223, -5208, -5192, -5177, -5161, -5145, + -5129, -5113, -5097, -5081, -5065, -5049, -5032, -5016, + -4999, -4983, -4966, -4950, -4933, -4916, -4899, -4882, + -4865, -4847, -4830, -4813, -4795, -4778, -4760, -4743, + -4725, -4707, -4689, -4671, -4653, -4635, -4617, -4598, + -4580, -4562, -4543, -4524, -4506, -4487, -4468, -4449, + -4430, -4411, -4392, -4373, -4354, -4334, -4315, -4295, + -4276, -4256, -4236, -4216, -4196, -4176, -4156, -4136, + -4116, -4096, -4075, -4055, -4034, -4014, -3993, -3972, + -3951, -3930, -3909, -3888, -3867, -3846, -3825, -3803, + -3782, -3760, -3739, -3717, -3695, -3673, -3651, -3629, + -3607, -3585, -3563, -3541, -3518, -3496, -3473, -3451, + -3428, -3405, -3383, -3360, -3337, -3314, -3290, -3267, + -3244, -3221, -3197, -3174, -3150, -3126, -3103, -3079, + -3055, -3031, -3007, -2983, -2959, -2934, -2910, -2886, + -2861, -2837, -2812, -2787, -2763, -2738, -2713, -2688, + -2663, -2638, -2612, -2587, -2562, -2536, -2511, -2485, + -2459, -2434, -2408, -2382, -2356, -2330, -2304, -2278, + -2251, -2225, -2199, -2172, -2146, -2119, -2092, -2066, + -2039, -2012, -1985, -1958, -1931, -1904, -1876, -1849, + -1822, -1794, -1767, -1739, -1711, -1683, -1656, -1628, + -1600, -1572, -1544, -1515, -1487, -1459, -1430, -1402, + -1373, -1345, -1316, -1287, -1258, -1230, -1201, -1172, + -1142, -1113, -1084, -1055, -1025, -996, -966, -937, + -907, -877, -848, -818, -788, -758, -728, -698, + -667, -637, -607, -576, -546, -515, -485, -454, + -423, -392, -362, -331, -300, -268, -237, -206, + -175, -143, -112, -81, -49, -17, 14, 46, + 78, 110, 142, 174, 206, 238, 270, 302, + 335, 367, 399, 432, 465, 497, 530, 563, + 596, 628, 661, 694, 728, 761, 794, 827, + 861, 894, 927, 961, 995, 1028, 1062, 1096, + 1130, 1163, 1197, 1231, 1266, 1300, 1334, 1368, + 1402, 1437, 1471, 1506, 1540, 1575, 1610, 1644, + 1679, 1714, 1749, 1784, 1819, 1854, 1889, 1925, + 1960, 1995, 2031, 2066, 2101, 2137, 2173, 2208, + 2244, 2280, 2316, 2352, 2388, 2424, 2460, 2496, + 2532, 2568, 2604, 2641, 2677, 2714, 2750, 2787, + 2823, 2860, 2897, 2934, 2970, 3007, 3044, 3081, + 3118, 3155, 3193, 3230, 3267, 3304, 3342, 3379, + 3417, 3454, 3492, 3529, 3567, 3605, 3643, 3681, + 3718, 3756, 3794, 3832, 3870, 3909, 3947, 3985, + 4023, 4062, 4100, 4138, 4177, 4216, 4254, 4293, + 4331, 4370, 4409, 4448, 4487, 4526, 4564, 4603, + 4643, 4682, 4721, 4760, 4799, 4839, 4878, 4917, + 4957, 4996, 5036, 5075, 5115, 5154, 5194, 5234, + 5274, 5313, 5353, 5393, 5433, 5473, 5513, 5553, + 5593, 5634, 5674, 5714, 5754, 5795, 5835, 5875, + 5916, 5956, 5997, 6037, 6078, 6119, 6159, 6200, + 6241, 6282, 6323, 6364, 6404, 6445, 6486, 6527, + 6569, 6610, 6651, 6692, 6733, 6774, 6816, 6857, + 6899, 6940, 6981, 7023, 7064, 7106, 7148, 7189, + 7231, 7272, 7314, 7356, 7398, 7440, 7481, 7523, + 7565, 7607, 7649, 7691, 7733, 7775, 7818, 7860, + 7902, 7944, 7986, 8029, 8071, 8113, 8156, 8198, + 8240, 8283, 8325, 8368, 8410, 8453, 8496, 8538, + 8581, 8624, 8666, 8709, 8752, 8795, 8837, 8880, + 8923, 8966, 9009, 9052, 9095, 9138, 9181, 9224, + 9267, 9310, 9353, 9396, 9440, 9483, 9526, 9569, + 9613, 9656, 9699, 9743, 9786, 9829, 9873, 9916, + 9960, 10003, 10046, 10090, 10133, 10177, 10221, 10264, + 10308, 10351, 10395, 10439, 10482, 10526, 10570, 10614, + 10657, 10701, 10745, 10789, 10832, 10876, 10920, 10964, + 11008, 11052, 11096, 11140, 11184, 11228, 11272, 11316, + 11360, 11404, 11448, 11492, 11536, 11580, 11624, 11668, + 11712, 11756, 11800, 11844, 11889, 11933, 11977, 12021, + 12065, 12110, 12154, 12198, 12242, 12286, 12331, 12375, + 12419, 12464, 12508, 12552, 12596, 12641, 12685, 12729, + 12774, 12818, 12862, 12907, 12951, 12995, 13040, 13084, + 13129, 13173, 13217, 13262, 13306, 13351, 13395, 13439, + 13484, 13528, 13573, 13617, 13661, 13706, 13750, 13795, + 13839, 13883, 13928, 13972, 14017, 14061, 14106, 14150, + 14194, 14239, 14283, 14328, 14372, 14417, 14461, 14505, + 14550, 14594, 14639, 14683, 14727, 14772, 14816, 14861, + 14905, 14949, 14994, 15038, 15082, 15127, 15171, 15215, + 15260, 15304, 15348, 15393, 15437, 15481, 15526, 15570, + 15614, 15658, 15703, 15747, 15791, 15835, 15880, 15924, + 15968, 16012, 16056, 16100, 16145, 16189, 16233, 16277, + 16321, 16365, 16409, 16453, 16497, 16541, 16585, 16629, + 16673, 16717, 16761, 16805, 16849, 16893, 16937, 16981, + 17025, 17069, 17113, 17156, 17200, 17244, 17288, 17332, + 17375, 17419, 17463, 17506, 17550, 17594, 17637, 17681, + 17725, 17768, 17812, 17855, 17899, 17942, 17986, 18029, + 18072, 18116, 18159, 18203, 18246, 18289, 18332, 18376, + 18419, 18462, 18505, 18549, 18592, 18635, 18678, 18721, + 18764, 18807, 18850, 18893, 18936, 18979, 19022, 19064, + 19107, 19150, 19193, 19236, 19278, 19321, 19364, 19406, + 19449, 19491, 19534, 19576, 19619, 19661, 19704, 19746, + 19788, 19831, 19873, 19915, 19957, 20000, 20042, 20084, + 20126, 20168, 20210, 20252, 20294, 20336, 20378, 20420, + 20461, 20503, 20545, 20587, 20628, 20670, 20712, 20753, + 20795, 20836, 20878, 20919, 20960, 21002, 21043, 21084, + 21125, 21167, 21208, 21249, 21290, 21331, 21372, 21413, + 21454, 21495, 21535, 21576, 21617, 21658, 21698, 21739, + 21779, 21820, 21860, 21901, 21941, 21982, 22022, 22062, + 22102, 22142, 22183, 22223, 22263, 22303, 22343, 22383, + 22422, 22462, 22502, 22542, 22581, 22621, 22660, 22700, + 22739, 22779, 22818, 22858, 22897, 22936, 22975, 23014, + 23053, 23092, 23131, 23170, 23209, 23248, 23287, 23326, + 23364, 23403, 23441, 23480, 23518, 23557, 23595, 23633, + 23672, 23710, 23748, 23786, 23824, 23862, 23900, 23938, + 23976, 24013, 24051, 24089, 24126, 24164, 24201, 24239, + 24276, 24313, 24351, 24388, 24425, 24462, 24499, 24536, + 24573, 24610, 24647, 24683, 24720, 24756, 24793, 24830, + 24866, 24902, 24939, 24975, 25011, 25047, 25083, 25119, + 25155, 25191, 25227, 25262, 25298, 25334, 25369, 25405, + 25440, 25476, 25511, 25546, 25581, 25616, 25651, 25686, + 25721, 25756, 25791, 25826, 25860, 25895, 25929, 25964, + 25998, 26033, 26067, 26101, 26135, 26169, 26203, 26237, + 26271, 26305, 26338, 26372, 26405, 26439, 26472, 26506, + 26539, 26572, 26605, 26638, 26671, 26704, 26737, 26770, + 26803, 26835, 26868, 26900, 26933, 26965, 26997, 27030, + 27062, 27094, 27126, 27158, 27189, 27221, 27253, 27285, + 27316, 27348, 27379, 27410, 27441, 27473, 27504, 27535, + 27566, 27597, 27627, 27658, 27689, 27719, 27750, 27780, + 27810, 27841, 27871, 27901, 27931, 27961, 27991, 28020, + 28050, 28080, 28109, 28139, 28168, 28197, 28227, 28256, + 28285, 28314, 28343, 28371, 28400, 28429, 28457, 28486, + 28514, 28543, 28571, 28599, 28627, 28655, 28683, 28711, + 28738, 28766, 28794, 28821, 28849, 28876, 28903, 28930, + 28957, 28984, 29011, 29038, 29065, 29092, 29118, 29145, + 29171, 29197, 29223, 29250, 29276, 29302, 29327, 29353, + 29379, 29405, 29430, 29456, 29481, 29506, 29531, 29556, + 29581, 29606, 29631, 29656, 29681, 29705, 29730, 29754, + 29778, 29802, 29827, 29851, 29874, 29898, 29922, 29946, + 29969, 29993, 30016, 30039, 30063, 30086, 30109, 30132, + 30155, 30177, 30200, 30223, 30245, 30267, 30290, 30312, + 30334, 30356, 30378, 30400, 30422, 30443, 30465, 30486, + 30508, 30529, 30550, 30571, 30592, 30613, 30634, 30654, + 30675, 30696, 30716, 30736, 30756, 30777, 30797, 30817, + 30836, 30856, 30876, 30895, 30915, 30934, 30953, 30973, + 30992, 31011, 31030, 31048, 31067, 31086, 31104, 31123, + 31141, 31159, 31177, 31195, 31213, 31231, 31249, 31266, + 31284, 31301, 31318, 31336, 31353, 31370, 31387, 31404, + 31420, 31437, 31453, 31470, 31486, 31502, 31519, 31535, + 31551, 31566, 31582, 31598, 31613, 31629, 31644, 31659, + 31674, 31689, 31704, 31719, 31734, 31749, 31763, 31777, + 31792, 31806, 31820, 31834, 31848, 31862, 31876, 31889, + 31903, 31916, 31929, 31943, 31956, 31969, 31982, 31994, + 32007, 32020, 32032, 32045, 32057, 32069, 32081, 32093, + 32105, 32117, 32128, 32140, 32151, 32163, 32174, 32185, + 32196, 32207, 32218, 32229, 32239, 32250, 32260, 32270, + 32281, 32291, 32301, 32311, 32320, 32330, 32340, 32349, + 32358, 32368, 32377, 32386, 32395, 32404, 32412, 32421, + 32430, 32438, 32446, 32455, 32463, 32471, 32479, 32486, + 32494, 32502, 32509, 32516, 32524, 32531, 32538, 32545, + 32552, 32558, 32565, 32572, 32578, 32584, 32590, 32597, + 32603, 32608, 32614, 32620, 32625, 32631, 32636, 32642, + 32647, 32652, 32657, 32661, 32666, 32671, 32675, 32680, + 32684, 32688, 32692, 32696, 32700, 32704, 32707, 32711, + 32714, 32718, 32721, 32724, 32727, 32730, 32733, 32735, + 32738, 32740, 32743, 32745, 32747, 32749, 32751, 32753, + 32755, 32756, 32758, 32759, 32760, 32762, 32763, 32764, + 32764, 32765, 32766, 32766, 32767, 32767, 32767, 32767, + 32767, 32767, 32767, 32767, 32766, 32766, 32765, 32764, + 32764, 32763, 32762, 32760, 32759, 32758, 32756, 32755, + 32753, 32751, 32749, 32747, 32745, 32743, 32740, 32738, + 32735, 32733, 32730, 32727, 32724, 32721, 32718, 32714, + 32711, 32707, 32704, 32700, 32696, 32692, 32688, 32684, + 32680, 32675, 32671, 32666, 32661, 32657, 32652, 32647, + 32642, 32636, 32631, 32625, 32620, 32614, 32608, 32603, + 32597, 32590, 32584, 32578, 32572, 32565, 32558, 32552, + 32545, 32538, 32531, 32524, 32516, 32509, 32502, 32494, + 32486, 32479, 32471, 32463, 32455, 32446, 32438, 32430, + 32421, 32412, 32404, 32395, 32386, 32377, 32368, 32358, + 32349, 32340, 32330, 32320, 32311, 32301, 32291, 32281, + 32270, 32260, 32250, 32239, 32229, 32218, 32207, 32196, + 32185, 32174, 32163, 32151, 32140, 32128, 32117, 32105, + 32093, 32081, 32069, 32057, 32045, 32032, 32020, 32007, + 31994, 31982, 31969, 31956, 31943, 31929, 31916, 31903, + 31889, 31876, 31862, 31848, 31834, 31820, 31806, 31792, + 31777, 31763, 31749, 31734, 31719, 31704, 31689, 31674, + 31659, 31644, 31629, 31613, 31598, 31582, 31566, 31551, + 31535, 31519, 31502, 31486, 31470, 31453, 31437, 31420, + 31404, 31387, 31370, 31353, 31336, 31318, 31301, 31284, + 31266, 31249, 31231, 31213, 31195, 31177, 31159, 31141, + 31123, 31104, 31086, 31067, 31048, 31030, 31011, 30992, + 30973, 30953, 30934, 30915, 30895, 30876, 30856, 30836, + 30817, 30797, 30777, 30756, 30736, 30716, 30696, 30675, + 30654, 30634, 30613, 30592, 30571, 30550, 30529, 30508, + 30486, 30465, 30443, 30422, 30400, 30378, 30356, 30334, + 30312, 30290, 30267, 30245, 30223, 30200, 30177, 30155, + 30132, 30109, 30086, 30063, 30039, 30016, 29993, 29969, + 29946, 29922, 29898, 29874, 29851, 29827, 29802, 29778, + 29754, 29730, 29705, 29681, 29656, 29631, 29606, 29581, + 29556, 29531, 29506, 29481, 29456, 29430, 29405, 29379, + 29353, 29327, 29302, 29276, 29250, 29223, 29197, 29171, + 29145, 29118, 29092, 29065, 29038, 29011, 28984, 28957, + 28930, 28903, 28876, 28849, 28821, 28794, 28766, 28738, + 28711, 28683, 28655, 28627, 28599, 28571, 28543, 28514, + 28486, 28457, 28429, 28400, 28371, 28343, 28314, 28285, + 28256, 28227, 28197, 28168, 28139, 28109, 28080, 28050, + 28020, 27991, 27961, 27931, 27901, 27871, 27841, 27810, + 27780, 27750, 27719, 27689, 27658, 27627, 27597, 27566, + 27535, 27504, 27473, 27441, 27410, 27379, 27348, 27316, + 27285, 27253, 27221, 27189, 27158, 27126, 27094, 27062, + 27030, 26997, 26965, 26933, 26900, 26868, 26835, 26803, + 26770, 26737, 26704, 26671, 26638, 26605, 26572, 26539, + 26506, 26472, 26439, 26405, 26372, 26338, 26305, 26271, + 26237, 26203, 26169, 26135, 26101, 26067, 26033, 25998, + 25964, 25929, 25895, 25860, 25826, 25791, 25756, 25721, + 25686, 25651, 25616, 25581, 25546, 25511, 25476, 25440, + 25405, 25369, 25334, 25298, 25262, 25227, 25191, 25155, + 25119, 25083, 25047, 25011, 24975, 24939, 24902, 24866, + 24830, 24793, 24756, 24720, 24683, 24647, 24610, 24573, + 24536, 24499, 24462, 24425, 24388, 24351, 24313, 24276, + 24239, 24201, 24164, 24126, 24089, 24051, 24013, 23976, + 23938, 23900, 23862, 23824, 23786, 23748, 23710, 23672, + 23633, 23595, 23557, 23518, 23480, 23441, 23403, 23364, + 23326, 23287, 23248, 23209, 23170, 23131, 23092, 23053, + 23014, 22975, 22936, 22897, 22858, 22818, 22779, 22739, + 22700, 22660, 22621, 22581, 22542, 22502, 22462, 22422, + 22383, 22343, 22303, 22263, 22223, 22183, 22142, 22102, + 22062, 22022, 21982, 21941, 21901, 21860, 21820, 21779, + 21739, 21698, 21658, 21617, 21576, 21535, 21495, 21454, + 21413, 21372, 21331, 21290, 21249, 21208, 21167, 21125, + 21084, 21043, 21002, 20960, 20919, 20878, 20836, 20795, + 20753, 20712, 20670, 20628, 20587, 20545, 20503, 20461, + 20420, 20378, 20336, 20294, 20252, 20210, 20168, 20126, + 20084, 20042, 20000, 19957, 19915, 19873, 19831, 19788, + 19746, 19704, 19661, 19619, 19576, 19534, 19491, 19449, + 19406, 19364, 19321, 19278, 19236, 19193, 19150, 19107, + 19064, 19022, 18979, 18936, 18893, 18850, 18807, 18764, + 18721, 18678, 18635, 18592, 18549, 18505, 18462, 18419, + 18376, 18332, 18289, 18246, 18203, 18159, 18116, 18072, + 18029, 17986, 17942, 17899, 17855, 17812, 17768, 17725, + 17681, 17637, 17594, 17550, 17506, 17463, 17419, 17375, + 17332, 17288, 17244, 17200, 17156, 17113, 17069, 17025, + 16981, 16937, 16893, 16849, 16805, 16761, 16717, 16673, + 16629, 16585, 16541, 16497, 16453, 16409, 16365, 16321, + 16277, 16233, 16189, 16145, 16100, 16056, 16012, 15968, + 15924, 15880, 15835, 15791, 15747, 15703, 15658, 15614, + 15570, 15526, 15481, 15437, 15393, 15348, 15304, 15260, + 15215, 15171, 15127, 15082, 15038, 14994, 14949, 14905, + 14861, 14816, 14772, 14727, 14683, 14639, 14594, 14550, + 14505, 14461, 14417, 14372, 14328, 14283, 14239, 14194, + 14150, 14106, 14061, 14017, 13972, 13928, 13883, 13839, + 13795, 13750, 13706, 13661, 13617, 13573, 13528, 13484, + 13439, 13395, 13351, 13306, 13262, 13217, 13173, 13129, + 13084, 13040, 12995, 12951, 12907, 12862, 12818, 12774, + 12729, 12685, 12641, 12596, 12552, 12508, 12464, 12419, + 12375, 12331, 12286, 12242, 12198, 12154, 12110, 12065, + 12021, 11977, 11933, 11889, 11844, 11800, 11756, 11712, + 11668, 11624, 11580, 11536, 11492, 11448, 11404, 11360, + 11316, 11272, 11228, 11184, 11140, 11096, 11052, 11008, + 10964, 10920, 10876, 10832, 10789, 10745, 10701, 10657, + 10614, 10570, 10526, 10482, 10439, 10395, 10351, 10308, + 10264, 10221, 10177, 10133, 10090, 10046, 10003, 9960, + 9916, 9873, 9829, 9786, 9743, 9699, 9656, 9613, + 9569, 9526, 9483, 9440, 9396, 9353, 9310, 9267, + 9224, 9181, 9138, 9095, 9052, 9009, 8966, 8923, + 8880, 8837, 8795, 8752, 8709, 8666, 8624, 8581, + 8538, 8496, 8453, 8410, 8368, 8325, 8283, 8240, + 8198, 8156, 8113, 8071, 8029, 7986, 7944, 7902, + 7860, 7818, 7775, 7733, 7691, 7649, 7607, 7565, + 7523, 7481, 7440, 7398, 7356, 7314, 7272, 7231, + 7189, 7148, 7106, 7064, 7023, 6981, 6940, 6899, + 6857, 6816, 6774, 6733, 6692, 6651, 6610, 6569, + 6527, 6486, 6445, 6404, 6364, 6323, 6282, 6241, + 6200, 6159, 6119, 6078, 6037, 5997, 5956, 5916, + 5875, 5835, 5795, 5754, 5714, 5674, 5634, 5593, + 5553, 5513, 5473, 5433, 5393, 5353, 5313, 5274, + 5234, 5194, 5154, 5115, 5075, 5036, 4996, 4957, + 4917, 4878, 4839, 4799, 4760, 4721, 4682, 4643, + 4603, 4564, 4526, 4487, 4448, 4409, 4370, 4331, + 4293, 4254, 4216, 4177, 4138, 4100, 4062, 4023, + 3985, 3947, 3909, 3870, 3832, 3794, 3756, 3718, + 3681, 3643, 3605, 3567, 3529, 3492, 3454, 3417, + 3379, 3342, 3304, 3267, 3230, 3193, 3155, 3118, + 3081, 3044, 3007, 2970, 2934, 2897, 2860, 2823, + 2787, 2750, 2714, 2677, 2641, 2604, 2568, 2532, + 2496, 2460, 2424, 2388, 2352, 2316, 2280, 2244, + 2208, 2173, 2137, 2101, 2066, 2031, 1995, 1960, + 1925, 1889, 1854, 1819, 1784, 1749, 1714, 1679, + 1644, 1610, 1575, 1540, 1506, 1471, 1437, 1402, + 1368, 1334, 1300, 1266, 1231, 1197, 1163, 1130, + 1096, 1062, 1028, 995, 961, 927, 894, 861, + 827, 794, 761, 728, 694, 661, 628, 596, + 563, 530, 497, 465, 432, 399, 367, 335, + 302, 270, 238, 206, 174, 142, 110, 78, + 46, 14, -17, -49, -81, -112, -143, -175, + -206, -237, -268, -300, -331, -362, -392, -423, + -454, -485, -515, -546, -576, -607, -637, -667, + -698, -728, -758, -788, -818, -848, -877, -907, + -937, -966, -996, -1025, -1055, -1084, -1113, -1142, + -1172, -1201, -1230, -1258, -1287, -1316, -1345, -1373, + -1402, -1430, -1459, -1487, -1515, -1544, -1572, -1600, + -1628, -1656, -1683, -1711, -1739, -1767, -1794, -1822, + -1849, -1876, -1904, -1931, -1958, -1985, -2012, -2039, + -2066, -2092, -2119, -2146, -2172, -2199, -2225, -2251, + -2278, -2304, -2330, -2356, -2382, -2408, -2434, -2459, + -2485, -2511, -2536, -2562, -2587, -2612, -2638, -2663, + -2688, -2713, -2738, -2763, -2787, -2812, -2837, -2861, + -2886, -2910, -2934, -2959, -2983, -3007, -3031, -3055, + -3079, -3103, -3126, -3150, -3174, -3197, -3221, -3244, + -3267, -3290, -3314, -3337, -3360, -3383, -3405, -3428, + -3451, -3473, -3496, -3518, -3541, -3563, -3585, -3607, + -3629, -3651, -3673, -3695, -3717, -3739, -3760, -3782, + -3803, -3825, -3846, -3867, -3888, -3909, -3930, -3951, + -3972, -3993, -4014, -4034, -4055, -4075, -4096, -4116, + -4136, -4156, -4176, -4196, -4216, -4236, -4256, -4276, + -4295, -4315, -4334, -4354, -4373, -4392, -4411, -4430, + -4449, -4468, -4487, -4506, -4524, -4543, -4562, -4580, + -4598, -4617, -4635, -4653, -4671, -4689, -4707, -4725, + -4743, -4760, -4778, -4795, -4813, -4830, -4847, -4865, + -4882, -4899, -4916, -4933, -4950, -4966, -4983, -4999, + -5016, -5032, -5049, -5065, -5081, -5097, -5113, -5129, + -5145, -5161, -5177, -5192, -5208, -5223, -5239, -5254, + -5269, -5285, -5300, -5315, -5330, -5345, -5359, -5374, + -5389, -5403, -5418, -5432, -5447, -5461, -5475, -5489, + -5503, -5517, -5531, -5545, -5558, -5572, -5585, -5599, + -5612, -5626, -5639, -5652, -5665, -5678, -5691, -5704, + -5717, -5729, -5742, -5755, -5767, -5779, -5792, -5804, + -5816, -5828, -5840, -5852, -5864, -5876, -5887, -5899, + -5911, -5922, -5933, -5945, -5956, -5967, -5978, -5989, + -6000, -6011, -6022, -6032, -6043, -6054, -6064, -6074, + -6085, -6095, -6105, -6115, -6125, -6135, -6145, -6155, + -6164, -6174, -6184, -6193, -6202, -6212, -6221, -6230, + -6239, -6248, -6257, -6266, -6275, -6284, -6292, -6301, + -6309, -6318, -6326, -6334, -6343, -6351, -6359, -6367, + -6374, -6382, -6390, -6398, -6405, -6413, -6420, -6428, + -6435, -6442, -6449, -6456, -6463, -6470, -6477, -6484, + -6491, -6497, -6504, -6510, -6517, -6523, -6529, -6535, + -6541, -6547, -6553, -6559, -6565, -6571, -6576, -6582, + -6588, -6593, -6598, -6604, -6609, -6614, -6619, -6624, + -6629, -6634, -6639, -6643, -6648, -6653, -6657, -6662, + -6666, -6670, -6674, -6679, -6683, -6687, -6691, -6694, + -6698, -6702, -6706, -6709, -6713, -6716, -6719, -6723, + -6726, -6729, -6732, -6735, -6738, -6741, -6744, -6747, + -6749, -6752, -6754, -6757, -6759, -6761, -6764, -6766, + -6768, -6770, -6772, -6774, -6776, -6777, -6779, -6781, + -6782, -6784, -6785, -6786, -6788, -6789, -6790, -6791, + -6792, -6793, -6794, -6795, -6795, -6796, -6797, -6797, + -6798, -6798, -6798, -6799, -6799, -6799, -6799, -6799, + -6799, -6799, -6799, -6798, -6798, -6798, -6797, -6797, + -6796, -6795, -6795, -6794, -6793, -6792, -6791, -6790, + -6789, -6788, -6787, -6785, -6784, -6783, -6781, -6779, + -6778, -6776, -6774, -6773, -6771, -6769, -6767, -6765, + -6763, -6760, -6758, -6756, -6753, -6751, -6748, -6746, + -6743, -6741, -6738, -6735, -6732, -6729, -6726, -6723, + -6720, -6717, -6714, -6710, -6707, -6703, -6700, -6696, + -6693, -6689, -6685, -6682, -6678, -6674, -6670, -6666, + -6662, -6658, -6654, -6649, -6645, -6641, -6636, -6632, + -6627, -6623, -6618, -6613, -6608, -6604, -6599, -6594, + -6589, -6584, -6579, -6573, -6568, -6563, -6558, -6552, + -6547, -6541, -6536, -6530, -6524, -6519, -6513, -6507, + -6501, -6495, -6489, -6483, -6477, -6471, -6464, -6458, + -6452, -6445, -6439, -6433, -6426, -6419, -6413, -6406, + -6399, -6392, -6386, -6379, -6372, -6365, -6358, -6350, + -6343, -6336, -6329, -6321, -6314, -6307, -6299, -6292, + -6284, -6276, -6269, -6261, -6253, -6245, -6238, -6230, + -6222, -6214, -6205, -6197, -6189, -6181, -6173, -6164, + -6156, -6148, -6139, -6131, -6122, -6113, -6105, -6096, + -6087, -6078, -6070, -6061, -6052, -6043, -6034, -6025, + -6016, -6006, -5997, -5988, -5979, -5969, -5960, -5950, + -5941, -5931, -5922, -5912, -5903, -5893, -5883, -5873, + -5863, -5854, -5844, -5834, -5824, -5814, -5804, -5793, + -5783, -5773, -5763, -5752, -5742, -5732, -5721, -5711, + -5700, -5690, -5679, -5668, -5658, -5647, -5636, -5626, + -5615, -5604, -5593, -5582, -5571, -5560, -5549, -5538, + -5527, -5515, -5504, -5493, -5482, -5470, -5459, -5447, + -5436, -5424, -5413, -5401, -5390, -5378, -5366, -5355, + -5343, -5331, -5319, -5308, -5296, -5284, -5272, -5260, + -5248, -5236, -5224, -5211, -5199, -5187, -5175, -5162, + -5150, -5138, -5125, -5113, -5101, -5088, -5076, -5063, + -5051, -5038, -5025, -5013, -5000, -4987, -4974, -4962, + -4949, -4936, -4923, -4910, -4897, -4884, -4871, -4858, + -4845, -4832, -4819, -4806, -4793, -4779, -4766, -4753, + -4740, -4726, -4713, -4699, -4686, -4673, -4659, -4646, + -4632, -4619, -4605, -4591, -4578, -4564, -4550, -4537, + -4523, -4509, -4495, -4482, -4468, -4454, -4440, -4426, + -4412, -4398, -4384, -4370, -4356, -4342, -4328, -4314, + -4300, -4286, -4271, -4257, -4243, -4229, -4215, -4200, + -4186, -4172, -4157, -4143, -4128, -4114, -4100, -4085, + -4071, -4056, -4042, -4027, -4012, -3998, -3983, -3969, + -3954, -3939, -3925, -3910, -3895, -3881, -3866, -3851, + -3836, -3821, -3807, -3792, -3777, -3762, -3747, -3732, + -3717, -3702, -3687, -3672, -3657, -3642, -3627, -3612, + -3597, -3582, -3567, -3552, -3537, -3521, -3506, -3491, + -3476, -3461, -3446, -3430, -3415, -3400, -3385, -3369, + -3354, -3339, -3323, -3308, -3293, -3277, -3262, -3246, + -3231, -3216, -3200, -3185, -3169, -3154, -3138, -3123, + -3107, -3092, -3076, -3061, -3045, -3030, -3014, -2999, + -2983, -2968, -2952, -2936, -2921, -2905, -2890, -2874, + -2858, -2843, -2827, -2811, -2796, -2780, -2764, -2749, + -2733, -2717, -2701, -2686, -2670, -2654, -2639, -2623, + -2607, -2591, -2576, -2560, -2544, -2528, -2512, -2497, + -2481, -2465, -2449, -2433, -2418, -2402, -2386, -2370, + -2354, -2339, -2323, -2307, -2291, -2275, -2259, -2244, + -2228, -2212, -2196, -2180, -2164, -2148, -2133, -2117, + -2101, -2085, -2069, -2053, -2037, -2022, -2006, -1990, + -1974, -1958, -1942, -1926, -1911, -1895, -1879, -1863, + -1847, -1831, -1815, -1800, -1784, -1768, -1752, -1736, + -1720, -1705, -1689, -1673, -1657, -1641, -1625, -1610, + -1594, -1578, -1562, -1546, -1531, -1515, -1499, -1483, + -1467, -1452, -1436, -1420, -1404, -1389, -1373, -1357, + -1341, -1326, -1310, -1294, -1278, -1263, -1247, -1231, + -1216, -1200, -1184, -1169, -1153, -1137, -1122, -1106, + -1091, -1075, -1059, -1044, -1028, -1013, -997, -981, + -966, -950, -935, -919, -904, -888, -873, -857, + -842, -826, -811, -795, -780, -765, -749, -734, + -718, -703, -688, -672, -657, -642, -626, -611, + -596, -580, -565, -550, -535, -519, -504, -489, + -474, -459, -443, -428, -413, -398, -383, -368, + -353, -338, -323, -308, -293, -278, -263, -248, + -233, -218, -203, -188, -173, -158, -143, -128, + -114, -99, -84, -69, -54, -40, -25, -10, + 5, 19, 34, 49, 63, 78, 92, 107, + 122, 136, 151, 165, 180, 194, 209, 223, + 237, 252, 266, 280, 295, 309, 323, 338, + 352, 366, 380, 395, 409, 423, 437, 451, + 465, 479, 493, 507, 521, 535, 549, 563, + 577, 591, 605, 619, 633, 647, 660, 674, + 688, 702, 715, 729, 743, 756, 770, 783, + 797, 811, 824, 838, 851, 864, 878, 891, + 905, 918, 931, 945, 958, 971, 984, 998, + 1011, 1024, 1037, 1050, 1063, 1076, 1089, 1102, + 1115, 1128, 1141, 1154, 1167, 1180, 1193, 1205, + 1218, 1231, 1244, 1256, 1269, 1282, 1294, 1307, + 1319, 1332, 1344, 1357, 1369, 1382, 1394, 1407, + 1419, 1431, 1443, 1456, 1468, 1480, 1492, 1504, + 1517, 1529, 1541, 1553, 1565, 1577, 1589, 1601, + 1612, 1624, 1636, 1648, 1660, 1671, 1683, 1695, + 1707, 1718, 1730, 1741, 1753, 1764, 1776, 1787, + 1799, 1810, 1821, 1833, 1844, 1855, 1867, 1878, + 1889, 1900, 1911, 1922, 1933, 1944, 1955, 1966, + 1977, 1988, 1999, 2010, 2021, 2031, 2042, 2053, + 2063, 2074, 2085, 2095, 2106, 2116, 2127, 2137, + 2148, 2158, 2168, 2179, 2189, 2199, 2209, 2220, + 2230, 2240, 2250, 2260, 2270, 2280, 2290, 2300, + 2310, 2320, 2330, 2339, 2349, 2359, 2369, 2378, + 2388, 2397, 2407, 2417, 2426, 2435, 2445, 2454, + 2464, 2473, 2482, 2491, 2501, 2510, 2519, 2528, + 2537, 2546, 2555, 2564, 2573, 2582, 2591, 2600, + 2609, 2617, 2626, 2635, 2644, 2652, 2661, 2669, + 2678, 2686, 2695, 2703, 2712, 2720, 2728, 2737, + 2745, 2753, 2761, 2769, 2777, 2785, 2793, 2801, + 2809, 2817, 2825, 2833, 2841, 2849, 2856, 2864, + 2872, 2879, 2887, 2895, 2902, 2910, 2917, 2925, + 2932, 2939, 2947, 2954, 2961, 2968, 2975, 2983, + 2990, 2997, 3004, 3011, 3018, 3025, 3031, 3038, + 3045, 3052, 3059, 3065, 3072, 3079, 3085, 3092, + 3098, 3105, 3111, 3117, 3124, 3130, 3136, 3143, + 3149, 3155, 3161, 3167, 3173, 3179, 3185, 3191, + 3197, 3203, 3209, 3215, 3221, 3226, 3232, 3238, + 3243, 3249, 3254, 3260, 3265, 3271, 3276, 3282, + 3287, 3292, 3297, 3303, 3308, 3313, 3318, 3323, + 3328, 3333, 3338, 3343, 3348, 3353, 3357, 3362, + 3367, 3372, 3376, 3381, 3385, 3390, 3395, 3399, + 3403, 3408, 3412, 3416, 3421, 3425, 3429, 3433, + 3437, 3442, 3446, 3450, 3454, 3458, 3461, 3465, + 3469, 3473, 3477, 3480, 3484, 3488, 3491, 3495, + 3498, 3502, 3505, 3509, 3512, 3515, 3519, 3522, + 3525, 3528, 3532, 3535, 3538, 3541, 3544, 3547, + 3550, 3553, 3556, 3558, 3561, 3564, 3567, 3569, + 3572, 3575, 3577, 3580, 3582, 3585, 3587, 3589, + 3592, 3594, 3596, 3598, 3601, 3603, 3605, 3607, + 3609, 3611, 3613, 3615, 3617, 3619, 3621, 3622, + 3624, 3626, 3627, 3629, 3631, 3632, 3634, 3635, + 3637, 3638, 3640, 3641, 3642, 3644, 3645, 3646, + 3647, 3648, 3649, 3650, 3651, 3652, 3653, 3654, + 3655, 3656, 3657, 3658, 3658, 3659, 3660, 3660, + 3661, 3662, 3662, 3663, 3663, 3663, 3664, 3664, + 3664, 3665, 3665, 3665, 3665, 3665, 3666, 3666, + 3666, 3666, 3666, 3665, 3665, 3665, 3665, 3665, + 3665, 3664, 3664, 3664, 3663, 3663, 3662, 3662, + 3661, 3661, 3660, 3659, 3659, 3658, 3657, 3657, + 3656, 3655, 3654, 3653, 3652, 3651, 3650, 3649, + 3648, 3647, 3646, 3645, 3643, 3642, 3641, 3640, + 3638, 3637, 3635, 3634, 3633, 3631, 3630, 3628, + 3626, 3625, 3623, 3621, 3620, 3618, 3616, 3614, + 3612, 3610, 3608, 3606, 3604, 3602, 3600, 3598, + 3596, 3594, 3592, 3589, 3587, 3585, 3583, 3580, + 3578, 3575, 3573, 3570, 3568, 3565, 3563, 3560, + 3557, 3555, 3552, 3549, 3547, 3544, 3541, 3538, + 3535, 3532, 3529, 3526, 3523, 3520, 3517, 3514, + 3511, 3508, 3505, 3501, 3498, 3495, 3491, 3488, + 3485, 3481, 3478, 3474, 3471, 3467, 3464, 3460, + 3457, 3453, 3449, 3446, 3442, 3438, 3434, 3430, + 3427, 3423, 3419, 3415, 3411, 3407, 3403, 3399, + 3395, 3391, 3387, 3382, 3378, 3374, 3370, 3365, + 3361, 3357, 3352, 3348, 3344, 3339, 3335, 3330, + 3326, 3321, 3317, 3312, 3307, 3303, 3298, 3293, + 3289, 3284, 3279, 3274, 3269, 3264, 3260, 3255, + 3250, 3245, 3240, 3235, 3230, 3224, 3219, 3214, + 3209, 3204, 3199, 3193, 3188, 3183, 3178, 3172, + 3167, 3162, 3156, 3151, 3145, 3140, 3134, 3129, + 3123, 3118, 3112, 3106, 3101, 3095, 3089, 3084, + 3078, 3072, 3066, 3060, 3055, 3049, 3043, 3037, + 3031, 3025, 3019, 3013, 3007, 3001, 2995, 2989, + 2983, 2977, 2970, 2964, 2958, 2952, 2945, 2939, + 2933, 2927, 2920, 2914, 2908, 2901, 2895, 2888, + 2882, 2875, 2869, 2862, 2856, 2849, 2843, 2836, + 2829, 2823, 2816, 2809, 2803, 2796, 2789, 2782, + 2776, 2769, 2762, 2755, 2748, 2741, 2734, 2727, + 2720, 2714, 2707, 2700, 2692, 2685, 2678, 2671, + 2664, 2657, 2650, 2643, 2636, 2628, 2621, 2614, + 2607, 2599, 2592, 2585, 2578, 2570, 2563, 2556, + 2548, 2541, 2533, 2526, 2518, 2511, 2504, 2496, + 2489, 2481, 2473, 2466, 2458, 2451, 2443, 2435, + 2428, 2420, 2412, 2405, 2397, 2389, 2382, 2374, + 2366, 2358, 2351, 2343, 2335, 2327, 2319, 2311, + 2303, 2296, 2288, 2280, 2272, 2264, 2256, 2248, + 2240, 2232, 2224, 2216, 2208, 2200, 2192, 2183, + 2175, 2167, 2159, 2151, 2143, 2135, 2127, 2118, + 2110, 2102, 2094, 2085, 2077, 2069, 2061, 2052, + 2044, 2036, 2027, 2019, 2011, 2002, 1994, 1986, + 1977, 1969, 1961, 1952, 1944, 1935, 1927, 1918, + 1910, 1901, 1893, 1884, 1876, 1867, 1859, 1850, + 1842, 1833, 1825, 1816, 1808, 1799, 1790, 1782, + 1773, 1765, 1756, 1747, 1739, 1730, 1721, 1713, + 1704, 1695, 1687, 1678, 1669, 1660, 1652, 1643, + 1634, 1625, 1617, 1608, 1599, 1590, 1582, 1573, + 1564, 1555, 1546, 1538, 1529, 1520, 1511, 1502, + 1493, 1485, 1476, 1467, 1458, 1449, 1440, 1431, + 1423, 1414, 1405, 1396, 1387, 1378, 1369, 1360, + 1351, 1342, 1333, 1324, 1316, 1307, 1298, 1289, + 1280, 1271, 1262, 1253, 1244, 1235, 1226, 1217, + 1208, 1199, 1190, 1181, 1172, 1163, 1154, 1145, + 1136, 1127, 1118, 1109, 1100, 1091, 1082, 1073, + 1064, 1055, 1046, 1037, 1028, 1019, 1010, 1001, + 992, 983, 974, 965, 956, 947, 938, 929, + 920, 910, 901, 892, 883, 874, 865, 856, + 847, 838, 829, 820, 811, 802, 793, 784, + 775, 766, 757, 748, 739, 730, 721, 712, + 703, 694, 685, 676, 667, 658, 649, 640, + 631, 622, 613, 604, 595, 586, 577, 568, + 559, 550, 541, 532, 523, 514, 505, 496, + 488, 479, 470, 461, 452, 443, 434, 425, + 416, 407, 398, 389, 381, 372, 363, 354, + 345, 336, 327, 318, 310, 301, 292, 283, + 274, 265, 257, 248, 239, 230, 221, 213, + 204, 195, 186, 178, 169, 160, 151, 143, + 134, 125, 116, 108, 99, 90, 82, 73, + 64, 56, 47, 38, 30, 21, 12, 4, + -5, -13, -22, -30, -39, -48, -56, -65, + -73, -82, -90, -99, -107, -116, -124, -133, + -141, -150, -158, -167, -175, -183, -192, -200, + -209, -217, -225, -234, -242, -250, -259, -267, + -275, -284, -292, -300, -309, -317, -325, -333, + -342, -350, -358, -366, -374, -382, -391, -399, + -407, -415, -423, -431, -439, -447, -456, -464, + -472, -480, -488, -496, -504, -512, -520, -528, + -536, -543, -551, -559, -567, -575, -583, -591, + -599, -606, -614, -622, -630, -638, -645, -653, + -661, -669, -676, -684, -692, -699, -707, -715, + -722, -730, -738, -745, -753, -760, -768, -775, + -783, -790, -798, -805, -813, -820, -828, -835, + -842, -850, -857, -865, -872, -879, -887, -894, + -901, -908, -916, -923, -930, -937, -945, -952, + -959, -966, -973, -980, -987, -994, -1001, -1009, + -1016, -1023, -1030, -1037, -1044, -1050, -1057, -1064, + -1071, -1078, -1085, -1092, -1099, -1105, -1112, -1119, + -1126, -1133, -1139, -1146, -1153, -1159, -1166, -1173, + -1179, -1186, -1193, -1199, -1206, -1212, -1219, -1225, + -1232, -1238, -1245, -1251, -1258, -1264, -1270, -1277, + -1283, -1289, -1296, -1302, -1308, -1315, -1321, -1327, + -1333, -1339, -1346, -1352, -1358, -1364, -1370, -1376, + -1382, -1388, -1394, -1400, -1406, -1412, -1418, -1424, + -1430, -1436, -1442, -1448, -1454, -1459, -1465, -1471, + -1477, -1482, -1488, -1494, -1500, -1505, -1511, -1516, + -1522, -1528, -1533, -1539, -1544, -1550, -1555, -1561, + -1566, -1572, -1577, -1582, -1588, -1593, -1599, -1604, + -1609, -1614, -1620, -1625, -1630, -1635, -1640, -1646, + -1651, -1656, -1661, -1666, -1671, -1676, -1681, -1686, + -1691, -1696, -1701, -1706, -1711, -1716, -1720, -1725, + -1730, -1735, -1740, -1744, -1749, -1754, -1758, -1763, + -1768, -1772, -1777, -1782, -1786, -1791, -1795, -1800, + -1804, -1809, -1813, -1817, -1822, -1826, -1830, -1835, + -1839, -1843, -1848, -1852, -1856, -1860, -1864, -1869, + -1873, -1877, -1881, -1885, -1889, -1893, -1897, -1901, + -1905, -1909, -1913, -1917, -1921, -1925, -1928, -1932, + -1936, -1940, -1944, -1947, -1951, -1955, -1958, -1962, + -1966, -1969, -1973, -1976, -1980, -1983, -1987, -1990, + -1994, -1997, -2001, -2004, -2007, -2011, -2014, -2017, + -2021, -2024, -2027, -2030, -2033, -2037, -2040, -2043, + -2046, -2049, -2052, -2055, -2058, -2061, -2064, -2067, + -2070, -2073, -2076, -2079, -2081, -2084, -2087, -2090, + -2092, -2095, -2098, -2101, -2103, -2106, -2108, -2111, + -2114, -2116, -2119, -2121, -2124, -2126, -2129, -2131, + -2133, -2136, -2138, -2140, -2143, -2145, -2147, -2150, + -2152, -2154, -2156, -2158, -2160, -2162, -2165, -2167, + -2169, -2171, -2173, -2175, -2177, -2179, -2180, -2182, + -2184, -2186, -2188, -2190, -2191, -2193, -2195, -2197, + -2198, -2200, -2202, -2203, -2205, -2206, -2208, -2209, + -2211, -2212, -2214, -2215, -2217, -2218, -2220, -2221, + -2222, -2224, -2225, -2226, -2227, -2229, -2230, -2231, + -2232, -2233, -2234, -2236, -2237, -2238, -2239, -2240, + -2241, -2242, -2243, -2244, -2244, -2245, -2246, -2247, + -2248, -2249, -2249, -2250, -2251, -2252, -2252, -2253, + -2254, -2254, -2255, -2255, -2256, -2256, -2257, -2257, + -2258, -2258, -2259, -2259, -2260, -2260, -2260, -2261, + -2261, -2261, -2261, -2262, -2262, -2262, -2262, -2262, + -2263, -2263, -2263, -2263, -2263, -2263, -2263, -2263, + -2263, -2263, -2263, -2263, -2262, -2262, -2262, -2262, + -2262, -2261, -2261, -2261, -2261, -2260, -2260, -2260, + -2259, -2259, -2259, -2258, -2258, -2257, -2257, -2256, + -2256, -2255, -2254, -2254, -2253, -2253, -2252, -2251, + -2251, -2250, -2249, -2248, -2248, -2247, -2246, -2245, + -2244, -2243, -2242, -2242, -2241, -2240, -2239, -2238, + -2237, -2236, -2235, -2233, -2232, -2231, -2230, -2229, + -2228, -2227, -2225, -2224, -2223, -2222, -2220, -2219, + -2218, -2216, -2215, -2214, -2212, -2211, -2209, -2208, + -2206, -2205, -2203, -2202, -2200, -2199, -2197, -2195, + -2194, -2192, -2190, -2189, -2187, -2185, -2184, -2182, + -2180, -2178, -2176, -2174, -2173, -2171, -2169, -2167, + -2165, -2163, -2161, -2159, -2157, -2155, -2153, -2151, + -2149, -2147, -2145, -2142, -2140, -2138, -2136, -2134, + -2132, -2129, -2127, -2125, -2122, -2120, -2118, -2116, + -2113, -2111, -2108, -2106, -2104, -2101, -2099, -2096, + -2094, -2091, -2089, -2086, -2083, -2081, -2078, -2076, + -2073, -2070, -2068, -2065, -2062, -2060, -2057, -2054, + -2051, -2049, -2046, -2043, -2040, -2037, -2034, -2031, + -2029, -2026, -2023, -2020, -2017, -2014, -2011, -2008, + -2005, -2002, -1999, -1996, -1992, -1989, -1986, -1983, + -1980, -1977, -1974, -1970, -1967, -1964, -1961, -1957, + -1954, -1951, -1948, -1944, -1941, -1938, -1934, -1931, + -1927, -1924, -1921, -1917, -1914, -1910, -1907, -1903, + -1900, -1896, -1893, -1889, -1886, -1882, -1878, -1875, + -1871, -1868, -1864, -1860, -1857, -1853, -1849, -1845, + -1842, -1838, -1834, -1830, -1827, -1823, -1819, -1815, + -1811, -1807, -1804, -1800, -1796, -1792, -1788, -1784, + -1780, -1776, -1772, -1768, -1764, -1760, -1756, -1752, + -1748, -1744, -1740, -1736, -1732, -1728, -1724, -1719, + -1715, -1711, -1707, -1703, -1699, -1694, -1690, -1686, + -1682, -1677, -1673, -1669, -1665, -1660, -1656, -1652, + -1647, -1643, -1639, -1634, -1630, -1625, -1621, -1617, + -1612, -1608, -1603, -1599, -1594, -1590, -1586, -1581, + -1577, -1572, -1567, -1563, -1558, -1554, -1549, -1545, + -1540, -1535, -1531, -1526, -1522, -1517, -1512, -1508, + -1503, -1498, -1494, -1489, -1484, -1480, -1475, -1470, + -1465, -1461, -1456, -1451, -1446, -1441, -1437, -1432, + -1427, -1422, -1417, -1413, -1408, -1403, -1398, -1393, + -1388, -1383, -1378, -1374, -1369, -1364, -1359, -1354, + -1349, -1344, -1339, -1334, -1329, -1324, -1319, -1314, + -1309, -1304, -1299, -1294, -1289, -1284, -1279, -1274, + -1269, -1264, -1258, -1253, -1248, -1243, -1238, -1233, + -1228, -1223, -1218, -1212, -1207, -1202, -1197, -1192, + -1187, -1181, -1176, -1171, -1166, -1161, -1155, -1150, + -1145, -1140, -1135, -1129, -1124, -1119, -1114, -1108, + -1103, -1098, -1092, -1087, -1082, -1077, -1071, -1066, + -1061, -1055, -1050, -1045, -1039, -1034, -1029, -1023, + -1018, -1013, -1007, -1002, -997, -991, -986, -980, + -975, -970, -964, -959, -953, -948, -943, -937, + -932, -926, -921, -916, -910, -905, -899, -894, + -888, -883, -877, -872, -867, -861, -856, -850, + -845, -839, -834, -828, -823, -817, -812, -806, + -801, -795, -790, -784, -779, -773, -768, -762, + -757, -751, -746, -740, -735, -729, -724, -718, + -713, -707, -702, -696, -691, -685, -680, -674, + -669, -663, -658, -652, -646, -641, -635, -630, + -624, -619, -613, -608, -602, -597, -591, -585, + -580, -574, -569, -563, -558, -552, -547, -541, + -536, -530, -524, -519, -513, -508, -502, -497, + -491, -486, -480, -475, -469, -464, -458, -452, + -447, -441, -436, -430, -425, -419, -414, -408, + -403, -397, -392, -386, -381, -375, -369, -364, + -358, -353, -347, -342, -336, -331, -325, -320, + -314, -309, -303, -298, -292, -287, -281, -276, + -270, -265, -259, -254, -248, -243, -238, -232, + -227, -221, -216, -210, -205, -199, -194, -188, + -183, -178, -172, -167, -161, -156, -150, -145, + -140, -134, -129, -123, -118, -113, -107, -102, + -96, -91, -86, -80, -75, -70, -64, -59, + -53, -48, -43, -37, -32, -27, -21, -16, + -11, -5, 0, 5, 10, 16, 21, 26, + 32, 37, 42, 47, 53, 58, 63, 68, + 74, 79, 84, 89, 95, 100, 105, 110, + 115, 121, 126, 131, 136, 141, 146, 152, + 157, 162, 167, 172, 177, 182, 187, 193, + 198, 203, 208, 213, 218, 223, 228, 233, + 238, 243, 248, 253, 258, 263, 268, 273, + 278, 283, 288, 293, 298, 303, 308, 313, + 318, 323, 328, 333, 338, 343, 347, 352, + 357, 362, 367, 372, 377, 382, 386, 391, + 396, 401, 406, 410, 415, 420, 425, 429, + 434, 439, 444, 448, 453, 458, 463, 467, + 472, 477, 481, 486, 491, 495, 500, 505, + 509, 514, 518, 523, 528, 532, 537, 541, + 546, 550, 555, 559, 564, 568, 573, 577, + 582, 586, 591, 595, 600, 604, 608, 613, + 617, 622, 626, 630, 635, 639, 643, 648, + 652, 656, 661, 665, 669, 674, 678, 682, + 686, 691, 695, 699, 703, 707, 712, 716, + 720, 724, 728, 732, 737, 741, 745, 749, + 753, 757, 761, 765, 769, 773, 777, 781, + 785, 789, 793, 797, 801, 805, 809, 813, + 817, 821, 825, 829, 832, 836, 840, 844, + 848, 852, 855, 859, 863, 867, 871, 874, + 878, 882, 885, 889, 893, 897, 900, 904, + 908, 911, 915, 918, 922, 926, 929, 933, + 936, 940, 943, 947, 950, 954, 957, 961, + 964, 968, 971, 975, 978, 981, 985, 988, + 992, 995, 998, 1002, 1005, 1008, 1012, 1015, + 1018, 1021, 1025, 1028, 1031, 1034, 1038, 1041, + 1044, 1047, 1050, 1053, 1057, 1060, 1063, 1066, + 1069, 1072, 1075, 1078, 1081, 1084, 1087, 1090, + 1093, 1096, 1099, 1102, 1105, 1108, 1111, 1114, + 1117, 1119, 1122, 1125, 1128, 1131, 1134, 1136, + 1139, 1142, 1145, 1148, 1150, 1153, 1156, 1158, + 1161, 1164, 1166, 1169, 1172, 1174, 1177, 1179, + 1182, 1185, 1187, 1190, 1192, 1195, 1197, 1200, + 1202, 1205, 1207, 1210, 1212, 1214, 1217, 1219, + 1222, 1224, 1226, 1229, 1231, 1233, 1236, 1238, + 1240, 1242, 1245, 1247, 1249, 1251, 1253, 1256, + 1258, 1260, 1262, 1264, 1266, 1268, 1271, 1273, + 1275, 1277, 1279, 1281, 1283, 1285, 1287, 1289, + 1291, 1293, 1295, 1297, 1298, 1300, 1302, 1304, + 1306, 1308, 1310, 1311, 1313, 1315, 1317, 1319, + 1320, 1322, 1324, 1325, 1327, 1329, 1330, 1332, + 1334, 1335, 1337, 1339, 1340, 1342, 1343, 1345, + 1346, 1348, 1349, 1351, 1352, 1354, 1355, 1357, + 1358, 1360, 1361, 1362, 1364, 1365, 1367, 1368, + 1369, 1370, 1372, 1373, 1374, 1376, 1377, 1378, + 1379, 1380, 1382, 1383, 1384, 1385, 1386, 1387, + 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, + 1397, 1398, 1399, 1400, 1401, 1402, 1403, 1404, + 1405, 1405, 1406, 1407, 1408, 1409, 1410, 1410, + 1411, 1412, 1413, 1414, 1414, 1415, 1416, 1416, + 1417, 1418, 1419, 1419, 1420, 1420, 1421, 1422, + 1422, 1423, 1423, 1424, 1424, 1425, 1425, 1426, + 1426, 1427, 1427, 1428, 1428, 1429, 1429, 1429, + 1430, 1430, 1431, 1431, 1431, 1432, 1432, 1432, + 1432, 1433, 1433, 1433, 1433, 1434, 1434, 1434, + 1434, 1434, 1434, 1435, 1435, 1435, 1435, 1435, + 1435, 1435, 1435, 1435, 1435, 1435, 1435, 1435, + 1435, 1435, 1435, 1435, 1435, 1435, 1435, 1435, + 1435, 1434, 1434, 1434, 1434, 1434, 1434, 1433, + 1433, 1433, 1433, 1432, 1432, 1432, 1432, 1431, + 1431, 1431, 1430, 1430, 1429, 1429, 1429, 1428, + 1428, 1427, 1427, 1427, 1426, 1426, 1425, 1425, + 1424, 1424, 1423, 1422, 1422, 1421, 1421, 1420, + 1420, 1419, 1418, 1418, 1417, 1416, 1416, 1415, + 1414, 1413, 1413, 1412, 1411, 1410, 1410, 1409, + 1408, 1407, 1406, 1406, 1405, 1404, 1403, 1402, + 1401, 1400, 1400, 1399, 1398, 1397, 1396, 1395, + 1394, 1393, 1392, 1391, 1390, 1389, 1388, 1387, + 1386, 1384, 1383, 1382, 1381, 1380, 1379, 1378, + 1377, 1375, 1374, 1373, 1372, 1371, 1369, 1368, + 1367, 1366, 1364, 1363, 1362, 1361, 1359, 1358, + 1357, 1355, 1354, 1353, 1351, 1350, 1348, 1347, + 1346, 1344, 1343, 1341, 1340, 1338, 1337, 1335, + 1334, 1332, 1331, 1329, 1328, 1326, 1325, 1323, + 1322, 1320, 1318, 1317, 1315, 1314, 1312, 1310, + 1309, 1307, 1305, 1304, 1302, 1300, 1299, 1297, + 1295, 1293, 1292, 1290, 1288, 1286, 1284, 1283, + 1281, 1279, 1277, 1275, 1274, 1272, 1270, 1268, + 1266, 1264, 1262, 1260, 1258, 1257, 1255, 1253, + 1251, 1249, 1247, 1245, 1243, 1241, 1239, 1237, + 1235, 1233, 1231, 1229, 1227, 1224, 1222, 1220, + 1218, 1216, 1214, 1212, 1210, 1208, 1205, 1203, + 1201, 1199, 1197, 1195, 1192, 1190, 1188, 1186, + 1183, 1181, 1179, 1177, 1174, 1172, 1170, 1168, + 1165, 1163, 1161, 1158, 1156, 1154, 1151, 1149, + 1147, 1144, 1142, 1140, 1137, 1135, 1132, 1130, + 1128, 1125, 1123, 1120, 1118, 1115, 1113, 1110, + 1108, 1105, 1103, 1100, 1098, 1095, 1093, 1090, + 1088, 1085, 1083, 1080, 1078, 1075, 1073, 1070, + 1067, 1065, 1062, 1060, 1057, 1054, 1052, 1049, + 1046, 1044, 1041, 1038, 1036, 1033, 1030, 1028, + 1025, 1022, 1020, 1017, 1014, 1012, 1009, 1006, + 1003, 1001, 998, 995, 992, 990, 987, 984, + 981, 978, 976, 973, 970, 967, 964, 961, + 959, 956, 953, 950, 947, 944, 942, 939, + 936, 933, 930, 927, 924, 921, 918, 915, + 913, 910, 907, 904, 901, 898, 895, 892, + 889, 886, 883, 880, 877, 874, 871, 868, + 865, 862, 859, 856, 853, 850, 847, 844, + 841, 838, 835, 832, 829, 826, 823, 820, + 817, 813, 810, 807, 804, 801, 798, 795, + 792, 789, 786, 783, 779, 776, 773, 770, + 767, 764, 761, 758, 754, 751, 748, 745, + 742, 739, 735, 732, 729, 726, 723, 720, + 716, 713, 710, 707, 704, 700, 697, 694, + 691, 688, 684, 681, 678, 675, 671, 668, + 665, 662, 659, 655, 652, 649, 646, 642, + 639, 636, 633, 629, 626, 623, 620, 616, + 613, 610, 606, 603, 600, 597, 593, 590, + 587, 584, 580, 577, 574, 570, 567, 564, + 560, 557, 554, 551, 547, 544, 541, 537, + 534, 531, 527, 524, 521, 517, 514, 511, + 507, 504, 501, 498, 494, 491, 488, 484, + 481, 478, 474, 471, 468, 464, 461, 458, + 454, 451, 448, 444, 441, 437, 434, 431, + 427, 424, 421, 417, 414, 411, 407, 404, + 401, 397, 394, 391, 387, 384, 381, 377, + 374, 371, 367, 364, 361, 357, 354, 351, + 347, 344, 341, 337, 334, 330, 327, 324, + 320, 317, 314, 310, 307, 304, 300, 297, + 294, 290, 287, 284, 280, 277, 274, 270, + 267, 264, 260, 257, 254, 250, 247, 244, + 240, 237, 234, 230, 227, 224, 221, 217, + 214, 211, 207, 204, 201, 197, 194, 191, + 187, 184, 181, 178, 174, 171, 168, 164, + 161, 158, 154, 151, 148, 145, 141, 138, + 135, 132, 128, 125, 122, 118, 115, 112, + 109, 105, 102, 99, 96, 92, 89, 86, + 83, 79, 76, 73, 70, 67, 63, 60, + 57, 54, 50, 47, 44, 41, 38, 34, + 31, 28, 25, 22, 18, 15, 12, 9, + 6, 2, -1, -4, -7, -10, -13, -16, + -20, -23, -26, -29, -32, -35, -39, -42, + -45, -48, -51, -54, -57, -60, -63, -67, + -70, -73, -76, -79, -82, -85, -88, -91, + -94, -97, -100, -104, -107, -110, -113, -116, + -119, -122, -125, -128, -131, -134, -137, -140, + -143, -146, -149, -152, -155, -158, -161, -164, + -167, -170, -173, -176, -179, -182, -185, -188, + -191, -194, -196, -199, -202, -205, -208, -211, + -214, -217, -220, -223, -226, -228, -231, -234, + -237, -240, -243, -246, -248, -251, -254, -257, + -260, -263, -266, -268, -271, -274, -277, -280, + -282, -285, -288, -291, -293, -296, -299, -302, + -305, -307, -310, -313, -315, -318, -321, -324, + -326, -329, -332, -334, -337, -340, -343, -345, + -348, -351, -353, -356, -359, -361, -364, -366, + -369, -372, -374, -377, -380, -382, -385, -387, + -390, -392, -395, -398, -400, -403, -405, -408, + -410, -413, -415, -418, -420, -423, -425, -428, + -430, -433, -435, -438, -440, -443, -445, -448, + -450, -453, -455, -457, -460, -462, -465, -467, + -469, -472, -474, -477, -479, -481, -484, -486, + -488, -491, -493, -495, -498, -500, -502, -505, + -507, -509, -512, -514, -516, -518, -521, -523, + -525, -527, -530, -532, -534, -536, -538, -541, + -543, -545, -547, -549, -552, -554, -556, -558, + -560, -562, -564, -567, -569, -571, -573, -575, + -577, -579, -581, -583, -585, -587, -589, -592, + -594, -596, -598, -600, -602, -604, -606, -608, + -610, -612, -614, -615, -617, -619, -621, -623, + -625, -627, -629, -631, -633, -635, -637, -639, + -640, -642, -644, -646, -648, -650, -651, -653, + -655, -657, -659, -661, -662, -664, -666, -668, + -669, -671, -673, -675, -676, -678, -680, -682, + -683, -685, -687, -688, -690, -692, -693, -695, + -697, -698, -700, -702, -703, -705, -706, -708, + -710, -711, -713, -714, -716, -717, -719, -720, + -722, -724, -725, -727, -728, -730, -731, -733, + -734, -735, -737, -738, -740, -741, -743, -744, + -746, -747, -748, -750, -751, -753, -754, -755, + -757, -758, -759, -761, -762, -763, -765, -766, + -767, -769, -770, -771, -772, -774, -775, -776, + -777, -779, -780, -781, -782, -783, -785, -786, + -787, -788, -789, -791, -792, -793, -794, -795, + -796, -797, -798, -800, -801, -802, -803, -804, + -805, -806, -807, -808, -809, -810, -811, -812, + -813, -814, -815, -816, -817, -818, -819, -820, + -821, -822, -823, -824, -825, -826, -827, -827, + -828, -829, -830, -831, -832, -833, -833, -834, + -835, -836, -837, -838, -838, -839, -840, -841, + -842, -842, -843, -844, -845, -845, -846, -847, + -847, -848, -849, -850, -850, -851, -852, -852, + -853, -854, -854, -855, -855, -856, -857, -857, + -858, -858, -859, -860, -860, -861, -861, -862, + -862, -863, -863, -864, -864, -865, -865, -866, + -866, -867, -867, -868, -868, -869, -869, -870, + -870, -870, -871, -871, -872, -872, -872, -873, + -873, -874, -874, -874, -875, -875, -875, -876, + -876, -876, -876, -877, -877, -877, -878, -878, + -878, -878, -879, -879, -879, -879, -879, -880, + -880, -880, -880, -880, -881, -881, -881, -881, + -881, -881, -882, -882, -882, -882, -882, -882, + -882, -882, -882, -882, -882, -882, -883, -883, + -883, -883, -883, -883, -883, -883, -883, -883, + -883, -883, -883, -883, -882, -882, -882, -882, + -882, -882, -882, -882, -882, -882, -882, -882, + -881, -881, -881, -881, -881, -881, -881, -880, + -880, -880, -880, -880, -879, -879, -879, -879, + -879, -878, -878, -878, -878, -877, -877, -877, + -876, -876, -876, -876, -875, -875, -875, -874, + -874, -874, -873, -873, -873, -872, -872, -871, + -871, -871, -870, -870, -870, -869, -869, -868, + -868, -867, -867, -867, -866, -866, -865, -865, + -864, -864, -863, -863, -862, -862, -861, -861, + -860, -860, -859, -859, -858, -857, -857, -856, + -856, -855, -855, -854, -853, -853, -852, -852, + -851, -850, -850, -849, -848, -848, -847, -846, + -846, -845, -844, -844, -843, -842, -842, -841, + -840, -840, -839, -838, -837, -837, -836, -835, + -834, -834, -833, -832, -831, -831, -830, -829, + -828, -827, -827, -826, -825, -824, -823, -822, + -822, -821, -820, -819, -818, -817, -816, -816, + -815, -814, -813, -812, -811, -810, -809, -808, + -808, -807, -806, -805, -804, -803, -802, -801, + -800, -799, -798, -797, -796, -795, -794, -793, + -792, -791, -790, -789, -788, -787, -786, -785, + -784, -783, -782, -781, -780, -779, -778, -777, + -776, -774, -773, -772, -771, -770, -769, -768, + -767, -766, -765, -763, -762, -761, -760, -759, + -758, -757, -755, -754, -753, -752, -751, -750, + -748, -747, -746, -745, -744, -743, -741, -740, + -739, -738, -736, -735, -734, -733, -732, -730, + -729, -728, -727, -725, -724, -723, -722, -720, + -719, -718, -716, -715, -714, -713, -711, -710, + -709, -707, -706, -705, -703, -702, -701, -699, + -698, -697, -695, -694, -693, -691, -690, -689, + -687, -686, -685, -683, -682, -680, -679, -678, + -676, -675, -673, -672, -671, -669, -668, -666, + -665, -664, -662, -661, -659, -658, -656, -655, + -654, -652, -651, -649, -648, -646, -645, -643, + -642, -640, -639, -638, -636, -635, -633, -632, + -630, -629, -627, -626, -624, -623, -621, -620, + -618, -617, -615, -614, -612, -610, -609, -607, + -606, -604, -603, -601, -600, -598, -597, -595, + -594, -592, -590, -589, -587, -586, -584, -583, + -581, -579, -578, -576, -575, -573, -572, -570, + -568, -567, -565, -564, -562, -560, -559, -557, + -556, -554, -552, -551, -549, -547, -546, -544, + -543, -541, -539, -538, -536, -534, -533, -531, + -530, -528, -526, -525, -523, -521, -520, -518, + -516, -515, -513, -511, -510, -508, -506, -505, + -503, -501, -500, -498, -496, -495, -493, -491, + -490, -488, -486, -485, -483, -481, -479, -478, + -476, -474, -473, -471, -469, -468, -466, -464, + -462, -461, -459, -457, -456, -454, -452, -450, + -449, -447, -445, -444, -442, -440, -438, -437, + -435, -433, -432, -430, -428, -426, -425, -423, + -421, -419, -418, -416, -414, -413, -411, -409, + -407, -406, -404, -402, -400, -399, -397, -395, + -393, -392, -390, -388, -386, -385, -383, -381, + -379, -378, -376, -374, -372, -371, -369, -367, + -365, -364, -362, -360, -358, -357, -355, -353, + -351, -350, -348, -346, -344, -343, -341, -339, + -337, -336, -334, -332, -330, -328, -327, -325, + -323, -321, -320, -318, -316, -314, -313, -311, + -309, -307, -306, -304, -302, -300, -299, -297, + -295, -293, -291, -290, -288, -286, -284, -283, + -281, -279, -277, -276, -274, -272, -270, -269, + -267, -265, -263, -262, -260, -258, -256, -255, + -253, -251, -249, -247, -246, -244, -242, -240, + -239, -237, -235, -233, -232, -230, -228, -226, + -225, -223, -221, -219, -218, -216, -214, -213, + -211, -209, -207, -206, -204, -202, -200, -199, + -197, -195, -193, -192, -190, -188, -186, -185, + -183, -181, -180, -178, -176, -174, -173, -171, + -169, -167, -166, -164, -162, -161, -159, -157, + -155, -154, -152, -150, -149, -147, -145, -143, + -142, -140, -138, -137, -135, -133, -132, -130, + -128, -126, -125, -123, -121, -120, -118, -116, + -115, -113, -111, -110, -108, -106, -105, -103, + -101, -99, -98, -96, -94, -93, -91, -89, + -88, -86, -84, -83, -81, -80, -78, -76, + -75, -73, -71, -70, -68, -66, -65, -63, + -61, -60, -58, -57, -55, -53, -52, -50, + -48, -47, -45, -43, -42, -40, -39, -37, + -35, -34, -32, -31, -29, -27, -26, -24, + -23, -21, -19, -18, -16, -15, -13, -11, + -10, -8, -7, -5, -4, -2, 0, 1, + 3, 4, 6, 7, 9, 10, 12, 14, + 15, 17, 18, 20, 21, 23, 24, 26, + 27, 29, 30, 32, 34, 35, 37, 38, + 40, 41, 43, 44, 46, 47, 49, 50, + 52, 53, 55, 56, 58, 59, 61, 62, + 64, 65, 66, 68, 69, 71, 72, 74, + 75, 77, 78, 80, 81, 82, 84, 85, + 87, 88, 90, 91, 93, 94, 95, 97, + 98, 100, 101, 102, 104, 105, 107, 108, + 109, 111, 112, 114, 115, 116, 118, 119, + 121, 122, 123, 125, 126, 127, 129, 130, + 132, 133, 134, 136, 137, 138, 140, 141, + 142, 144, 145, 146, 148, 149, 150, 152, + 153, 154, 155, 157, 158, 159, 161, 162, + 163, 165, 166, 167, 168, 170, 171, 172, + 174, 175, 176, 177, 179, 180, 181, 182, + 184, 185, 186, 187, 189, 190, 191, 192, + 193, 195, 196, 197, 198, 200, 201, 202, + 203, 204, 206, 207, 208, 209, 210, 211, + 213, 214, 215, 216, 217, 218, 220, 221, + 222, 223, 224, 225, 227, 228, 229, 230, + 231, 232, 233, 234, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 247, 248, + 249, 250, 251, 252, 253, 254, 255, 256, + 257, 258, 259, 260, 261, 262, 264, 265, + 266, 267, 268, 269, 270, 271, 272, 273, + 274, 275, 276, 277, 278, 279, 280, 281, + 282, 283, 284, 285, 286, 287, 288, 288, + 289, 290, 291, 292, 293, 294, 295, 296, + 297, 298, 299, 300, 301, 302, 302, 303, + 304, 305, 306, 307, 308, 309, 310, 311, + 311, 312, 313, 314, 315, 316, 317, 318, + 318, 319, 320, 321, 322, 323, 323, 324, + 325, 326, 327, 328, 328, 329, 330, 331, + 332, 332, 333, 334, 335, 336, 336, 337, + 338, 339, 339, 340, 341, 342, 343, 343, + 344, 345, 346, 346, 347, 348, 349, 349, + 350, 351, 351, 352, 353, 354, 354, 355, + 356, 356, 357, 358, 358, 359, 360, 361, + 361, 362, 363, 363, 364, 365, 365, 366, + 367, 367, 368, 368, 369, 370, 370, 371, + 372, 372, 373, 373, 374, 375, 375, 376, + 376, 377, 378, 378, 379, 379, 380, 381, + 381, 382, 382, 383, 383, 384, 385, 385, + 386, 386, 387, 387, 388, 388, 389, 389, + 390, 391, 391, 392, 392, 393, 393, 394, + 394, 395, 395, 396, 396, 397, 397, 398, + 398, 398, 399, 399, 400, 400, 401, 401, + 402, 402, 403, 403, 403, 404, 404, 405, + 405, 406, 406, 406, 407, 407, 408, 408, + 409, 409, 409, 410, 410, 410, 411, 411, + 412, 412, 412, 413, 413, 413, 414, 414, + 415, 415, 415, 416, 416, 416, 417, 417, + 417, 418, 418, 418, 419, 419, 419, 420, + 420, 420, 420, 421, 421, 421, 422, 422, + 422, 423, 423, 423, 423, 424, 424, 424, + 424, 425, 425, 425, 425, 426, 426, 426, + 426, 427, 427, 427, 427, 428, 428, 428, + 428, 428, 429, 429, 429, 429, 429, 430, + 430, 430, 430, 430, 431, 431, 431, 431, + 431, 432, 432, 432, 432, 432, 432, 432, + 433, 433, 433, 433, 433, 433, 433, 434, + 434, 434, 434, 434, 434, 434, 434, 435, + 435, 435, 435, 435, 435, 435, 435, 435, + 435, 436, 436, 436, 436, 436, 436, 436, + 436, 436, 436, 436, 436, 436, 436, 436, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 437, + 437, 437, 437, 437, 437, 437, 437, 436, + 436, 436, 436, 436, 436, 436, 436, 436, + 436, 436, 436, 436, 436, 436, 436, 435, + 435, 435, 435, 435, 435, 435, 435, 435, + 435, 434, 434, 434, 434, 434, 434, 434, + 434, 434, 433, 433, 433, 433, 433, 433, + 433, 432, 432, 432, 432, 432, 432, 432, + 431, 431, 431, 431, 431, 431, 430, 430, + 430, 430, 430, 430, 429, 429, 429, 429, + 429, 429, 428, 428, 428, 428, 428, 427, + 427, 427, 427, 427, 426, 426, 426, 426, + 425, 425, 425, 425, 425, 424, 424, 424, + 424, 423, 423, 423, 423, 422, 422, 422, + 422, 421, 421, 421, 421, 420, 420, 420, + 420, 419, 419, 419, 419, 418, 418, 418, + 418, 417, 417, 417, 416, 416, 416, 416, + 415, 415, 415, 414, 414, 414, 414, 413, + 413, 413, 412, 412, 412, 411, 411, 411, + 410, 410, 410, 409, 409, 409, 409, 408, + 408, 408, 407, 407, 407, 406, 406, 406, + 405, 405, 405, 404, 404, 404, 403, 403, + 402, 402, 402, 401, 401, 401, 400, 400, + 400, 399, 399, 399, 398, 398, 397, 397, + 397, 396, 396, 396, 395, 395, 394, 394, + 394, 393, 393, 393, 392, 392, 391, 391, + 391, 390, 390, 389, 389, 389, 388, 388, + 387, 387, 387, 386, 386, 385, 385, 385, + 384, 384, 383, 383, 382, 382, 382, 381, + 381, 380, 380, 380, 379, 379, 378, 378, + 377, 377, 377, 376, 376, 375, 375, 374, + 374, 373, 373, 373, 372, 372, 371, 371, + 370, 370, 370, 369, 369, 368, 368, 367, + 367, 366, 366, 365, 365, 365, 364, 364, + 363, 363, 362, 362, 361, 361, 360, 360, + 359, 359, 359, 358, 358, 357, 357, 356, + 356, 355, 355, 354, 354, 353, 353, 352, + 352, 351, 351, 350, 350, 349, 349, 348, + 348, 348, 347, 347, 346, 346, 345, 345, + 344, 344, 343, 343, 342, 342, 341, 341, + 340, 340, 339, 339, 338, 338, 337, 337, + 336, 336, 335, 335, 334, 334, 333, 333, + 332, 332, 331, 331, 330, 330, 329, 329, + 328, 328, 327, 327, 326, 326, 325, 325, + 324, 323, 323, 322, 322, 321, 321, 320, + 320, 319, 319, 318, 318, 317, 317, 316, + 316, 315, 315, 314, 314, 313, 313, 312, + 312, 311, 311, 310, 309, 309, 308, 308, + 307, 307, 306, 306, 305, 305, 304, 304, + 303, 303, 302, 302, 301, 300, 300, 299, + 299, 298, 298, 297, 297, 296, 296, 295, + 295, 294, 294, 293, 293, 292, 291, 291, + 290, 290, 289, 289, 288, 288, 287, 287, + 286, 286, 285, 285, 284, 283, 283, 282, + 282, 281, 281, 280, 280, 279, 279, 278, + 278, 277, 277, 276, 275, 275, 274, 274, + 273, 273, 272, 272, 271, 271, 270, 270, + 269, 268, 268, 267, 267, 266, 266, 265, + 265, 264, 264, 263, 263, 262, 261, 261, + 260, 260, 259, 259, 258, 258, 257, 257, + 256, 256, 255, 255, 254, 253, 253, 252, + 252, 251, 251, 250, 250, 249, 249, 248, + 248, 247, 247, 246, 245, 245, 244, 244, + 243, 243, 242, 242, 241, 241, 240, 240, + 239, 239, 238, 237, 237, 236, 236, 235, + 235, 234, 234, 233, 233, 232, 232, 231, + 231, 230, 230, 229, 229, 228, 227, 227, + 226, 226, 225, 225, 224, 224, 223, 223, + 222, 222, 221, 221, 220, 220, 219, 219, + 218, 218, 217, 217, 216, 215, 215, 214, + 214, 213, 213, 212, 212, 211, 211, 210, + 210, 209, 209, 208, 208, 207, 207, 206, + 206, 205, 205, 204, 204, 203, 203, 202, + 202, 201, 201, 200, 200, 199, 199, 198, + 198, 197, 197, 196, 196, 195, 195, 194, + 194, 193, 193, 192, 192, 191, 191, 190, + 190, 189, 189, 188, 188, 187, 187, 186, + 186, 185, 185, 184, 184, 183, 183, 182, + 182, 181, 181, 180, 180, 179, 179, 178, + 178, 177, 177, 176, 176, 175, 175, 174, + 174, 174, 173, 173, 172, 172, 171, 171, + 170, 170, 169, 169, 168, 168, 167, 167, + 166, 166, 165, 165, 165, 164, 164, 163, + 163, 162, 162, 161, 161, 160, 160, 159, + 159, 159, 158, 158, 157, 157, 156, 156, + 155, 155, 154, 154, 153, 153, 153, 152, + 152, 151, 151, 150, 150, 149, 149, 149, + 148, 148, 147, 147, 146, 146, 145, 145, + 145, 144, 144, 143, 143, 142, 142, 141, + 141, 141, 140, 140, 139, 139, 138, 138, + 138, 137, 137, 136, 136, 135, 135, 135, + 134, 134, 133, 133, 132, 132, 132, 131, + 131, 130, 130, 130, 129, 129, 128, 128, + 127, 127, 127, 126, 126, 125, 125, 125, + 124, 124, 123, 123, 123, 122, 122, 121, + 121, 121, 120, 120, 119, 119, 119, 118, + 118, 117, 117, 117, 116, 116, 115, 115, + 115, 114, 114, 113, 113, 113, 112, 112, + 112, 111, 111, 110, 110, 110, 109, 109, + 108, 108, 108, 107, 107, 107, 106, 106, + 105, 105, 105, 104, 104, 104, 103, 103, + 103, 102, 102, 101, 101, 101, 100, 100, + 100, 99, 99, 99, 98, 98, 97, 97, + 97, 96, 96, 96, 95, 95, 95, 94, + 94, 94, 93, 93, 93, 92, 92, 92, + 91, 91, 90, 90, 90, 89, 89, 89, + 88, 88, 88, 87, 87, 87, 86, 86, + 86, 85, 85, 85, 85, 84, 84, 84, + 83, 83, 83, 82, 82, 82, 81, 81, + 81, 80, 80, 80, 79, 79, 79, 78, + 78, 78, 77, 77, 77, 77, 76, 76, + 76, 75, 75, 75, 74, 74, 74, 74, + 73, 73, 73, 72, 72, 72, 71, 71, + 71, 71, 70, 70, 70, 69, 69, 69, + 69, 68, 68, 68, 67, 67, 67, 67, + 66, 66, 66, 65, 65, 65, 65, 64, + 64, 64, 63, 63, 63, 63, 62, 62, + 62, 62, 61, 61, 61, 60, 60, 60, + 60, 59, 59, 59, 59, 58, 58, 58, + 58, 57, 57, 57, 57, 56, 56, 56, + 56, 55, 55, 55, 55, 54, 54, 54, + 54, 53, 53, 53, 53, 52, 52, 52, + 52, 51, 51, 51, 51, 50, 50, 50, + 50, 50, 49, 49, 49, 49, 48, 48, + 48, 48, 47, 47, 47, 47, 46, 46, + 46, 46, 46, 45, 45, 45, 45, 44, + 44, 44, 44, 44, 43, 43, 43, 43, + 43, 42, 42, 42, 42, 42, 41, 41, + 41, 41, 40, 40, 40, 40, 40, 39, + 39, 39, 39, 39, 38, 38, 38, 38, + 38, 37, 37, 37, 37, 37, 37, 36, + 36, 36, 36, 36, 35, 35, 35, 35, + 35, 34, 34, 34, 34, 34, 33, 33, + 33, 33, 33, 33, 32, 32, 32, 32, + 32, 32, 31, 31, 31, 31, 31, 31, + 30, 30, 30, 30, 30, 29, 29, 29, + 29, 29, 29, 28, 28, 28, 28, 28, + 28, 27, 27, 27, 27, 27, 27, 27, + 26, 26, 26, 26, 26, 26, 26, 25, + 25, 25, 25, 25, 25, 24, 24, 24, + 24, 24, 24, 23, 23, 23, 23, 23, + 23, 23, 22, 22, 22, 22, 22, 22, + 22, 22, 21, 21, 21, 21, 21, 21, + 21, 21, 20, 20, 20, 20, 20, 20, + 20, 19, 19, 19, 19, 19, 19, 19, + 18, 18, 18, 18, 18, 18, 18, 18, + 18, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 16, 16, 16, 16, 16, + 16, 16, 15, 15, 15, 15, 15, 15, + 15, 15, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, + 13, 13, 13, 13, 13, 13, 13, 13, + 12, 12, 12, 12, 12, 12, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 10, 10, 10, 10, + 9, 9, 9, 8, 8, 8, 7, 7, + 7, 7, 7, 7, 7, 7, 8, 8, + 9, 9, 10, 11, 12, 14, 15, 17, + 18, 20, 21, 23, 25, 26, 28, 29, + 31, 32, 33, 34, 35, 35, 36, 36, +}; + + +void computeCorrectionFactors(float *factors, unsigned nrChannels) +{ + // The following matlab functions are used: + + // f=fftshift(fft(Coeffs16384Kaiser_quant,262144)) + // m=f(131073-128:131073+127) + // r=f(131073-128+256:131073+127+256) + // l=f(131073-128-256:131073+127-256) + // plot(2^50./(abs(m).^2+abs(l).^2+abs(r).^2)) + + unsigned fftSize = STATION_FFT_SIZE * nrChannels; + + // We cannot make the fft smaller than the number of filter constants. + if (fftSize < STATION_FILTER_LENGTH) + fftSize = STATION_FILTER_LENGTH; + + // it is not worth to use the more complex R2C FFTW method + std::vector<std::complex<float> > in(fftSize), out(fftSize); + +#if defined HAVE_FFTW3 + fftwf_plan plan; +#pragma omp critical (FFTW) + plan = fftwf_plan_dft_1d(fftSize, reinterpret_cast<fftwf_complex *>(&in[0]), reinterpret_cast<fftwf_complex *>(&out[0]), FFTW_FORWARD, FFTW_ESTIMATE); +#elif defined HAVE_FFTW2 + fftw_plan plan; +#pragma omp critical (FFTW) + plan = fftw_create_plan(fftSize, FFTW_FORWARD, FFTW_ESTIMATE); +#else +#error need FFTW2 or FFTW3 +#endif + + for (unsigned i = 0; i < STATION_FILTER_LENGTH; i ++) + in[i] = stationFilterConstants[i]; + + for (unsigned i = STATION_FILTER_LENGTH; i < fftSize; i ++) + in[i] = 0; + +#if defined HAVE_FFTW3 + fftwf_execute(plan); +#pragma omp critical (FFTW) + fftwf_destroy_plan(plan); +#elif defined HAVE_FFTW2 + fftw_one(plan, reinterpret_cast<fftw_complex *>(&in[0]), reinterpret_cast<fftw_complex *>(&out[0])); +#pragma omp critical (FFTW) + fftw_destroy_plan(plan); +#endif + + for (unsigned i = 0; i < nrChannels; i ++) { + const std::complex<float> m = out[(i - nrChannels / 2) % fftSize]; + const std::complex<float> l = out[(i - 3 * nrChannels / 2) % fftSize]; + const std::complex<float> r = out[i + nrChannels / 2]; + + factors[i] = pow(2, 25) / sqrt(abs(m * m + l * l + r * r)); + } +} + +} // namespace BandPass + + +#if 0 +int main() +{ + std::vector<float> factors(4096); + BandPass::computeCorrectionFactors(&factors[0], 4096); + return 0; +} +#endif diff --git a/RTCP/GPUProc/src/BandPass.h b/RTCP/GPUProc/src/BandPass.h new file mode 100644 index 0000000000000000000000000000000000000000..acdc9eff1c46882e27cff4ad90f5976651e3800e --- /dev/null +++ b/RTCP/GPUProc/src/BandPass.h @@ -0,0 +1,9 @@ +#ifndef BANDPASS_H +#define BANDPASS_H + + +namespace BandPass { + void computeCorrectionFactors(float *factors, unsigned nrChannels); +} + +#endif diff --git a/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl new file mode 100644 index 0000000000000000000000000000000000000000..b9d77ff7ac4749adca9a4267129afd7fd4ac9b44 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl @@ -0,0 +1,536 @@ +#define MAX(A,B) ((A)>(B)?(A):(B)) +#define NR_PASSES MAX((NR_STATIONS + 6) / 16, 1) // gives best results on GTX 680 +#define NR_STATIONS_PER_PASS ((NR_STATIONS + NR_PASSES - 1) / NR_PASSES) + +#if NR_STATIONS_PER_PASS > 32 +#error "need more passes to beam form this number of stations" +#endif + + +typedef __global float2 (*ComplexVoltagesType)[NR_CHANNELS][NR_SAMPLES_PER_CHANNEL][NR_TABS][NR_POLARIZATIONS]; +typedef __global float4 (*BandPassCorrectedType)[NR_STATIONS][NR_CHANNELS][NR_SAMPLES_PER_CHANNEL]; +typedef __global float2 (*WeightsType)[NR_STATIONS][NR_CHANNELS][NR_TABS]; + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *samplesPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + BandPassCorrectedType samples = (BandPassCorrectedType) samplesPtr; + WeightsType weights = (WeightsType) weightsPtr; + + uint pol = get_local_id(0); + uint tab = get_local_id(1); + uint channel = get_global_id(2); + + float2 sample; + __local union { + float2 samples[NR_STATIONS_PER_PASS][16][NR_POLARIZATIONS]; + float4 samples4[NR_STATIONS_PER_PASS][16]; + } _local; + +#pragma unroll + for (uint first_station = 0; first_station < NR_STATIONS; first_station += NR_STATIONS_PER_PASS) { +#if NR_STATIONS_PER_PASS >= 1 + float2 weight_00; + + if (first_station + 0 < NR_STATIONS) + weight_00 = (*weights)[first_station + 0][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 2 + float2 weight_01; + + if (first_station + 1 < NR_STATIONS) + weight_01 = (*weights)[first_station + 1][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 3 + float2 weight_02; + + if (first_station + 2 < NR_STATIONS) + weight_02 = (*weights)[first_station + 2][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 4 + float2 weight_03; + + if (first_station + 3 < NR_STATIONS) + weight_03 = (*weights)[first_station + 3][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 5 + float2 weight_04; + + if (first_station + 4 < NR_STATIONS) + weight_04 = (*weights)[first_station + 4][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 6 + float2 weight_05; + + if (first_station + 5 < NR_STATIONS) + weight_05 = (*weights)[first_station + 5][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 7 + float2 weight_06; + + if (first_station + 6 < NR_STATIONS) + weight_06 = (*weights)[first_station + 6][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 8 + float2 weight_07; + + if (first_station + 7 < NR_STATIONS) + weight_07 = (*weights)[first_station + 7][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 9 + float2 weight_08; + + if (first_station + 8 < NR_STATIONS) + weight_08 = (*weights)[first_station + 8][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 10 + float2 weight_09; + + if (first_station + 9 < NR_STATIONS) + weight_09 = (*weights)[first_station + 9][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 11 + float2 weight_10; + + if (first_station + 10 < NR_STATIONS) + weight_10 = (*weights)[first_station + 10][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 12 + float2 weight_11; + + if (first_station + 11 < NR_STATIONS) + weight_11 = (*weights)[first_station + 11][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 13 + float2 weight_12; + + if (first_station + 12 < NR_STATIONS) + weight_12 = (*weights)[first_station + 12][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 14 + float2 weight_13; + + if (first_station + 13 < NR_STATIONS) + weight_13 = (*weights)[first_station + 13][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 15 + float2 weight_14; + + if (first_station + 14 < NR_STATIONS) + weight_14 = (*weights)[first_station + 14][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 16 + float2 weight_15; + + if (first_station + 15 < NR_STATIONS) + weight_15 = (*weights)[first_station + 15][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 17 + float2 weight_16; + + if (first_station + 16 < NR_STATIONS) + weight_16 = (*weights)[first_station + 16][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 18 + float2 weight_17; + + if (first_station + 17 < NR_STATIONS) + weight_17 = (*weights)[first_station + 17][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 19 + float2 weight_18; + + if (first_station + 18 < NR_STATIONS) + weight_18 = (*weights)[first_station + 18][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 20 + float2 weight_19; + + if (first_station + 19 < NR_STATIONS) + weight_19 = (*weights)[first_station + 19][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 21 + float2 weight_20; + + if (first_station + 20 < NR_STATIONS) + weight_20 = (*weights)[first_station + 20][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 22 + float2 weight_21; + + if (first_station + 21 < NR_STATIONS) + weight_21 = (*weights)[first_station + 21][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 23 + float2 weight_22; + + if (first_station + 22 < NR_STATIONS) + weight_22 = (*weights)[first_station + 22][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 24 + float2 weight_23; + + if (first_station + 23 < NR_STATIONS) + weight_23 = (*weights)[first_station + 23][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 25 + float2 weight_24; + + if (first_station + 24 < NR_STATIONS) + weight_24 = (*weights)[first_station + 24][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 26 + float2 weight_25; + + if (first_station + 25 < NR_STATIONS) + weight_25 = (*weights)[first_station + 25][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 27 + float2 weight_26; + + if (first_station + 26 < NR_STATIONS) + weight_26 = (*weights)[first_station + 26][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 28 + float2 weight_27; + + if (first_station + 27 < NR_STATIONS) + weight_27 = (*weights)[first_station + 27][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 29 + float2 weight_28; + + if (first_station + 28 < NR_STATIONS) + weight_28 = (*weights)[first_station + 28][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 30 + float2 weight_29; + + if (first_station + 29 < NR_STATIONS) + weight_29 = (*weights)[first_station + 29][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 31 + float2 weight_30; + + if (first_station + 30 < NR_STATIONS) + weight_30 = (*weights)[first_station + 30][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 32 + float2 weight_31; + + if (first_station + 31 < NR_STATIONS) + weight_31 = (*weights)[first_station + 31][channel][tab]; +#endif + + for (uint time = 0; time < NR_SAMPLES_PER_CHANNEL; time += 16) { + for (uint i = get_local_id(0) + NR_POLARIZATIONS * get_local_id(1); i < NR_STATIONS_PER_PASS * 16; i += NR_TABS * NR_POLARIZATIONS) { + uint t = i % 16; + uint s = i / 16; + + if (NR_SAMPLES_PER_CHANNEL % 16 == 0 || time + t < NR_SAMPLES_PER_CHANNEL) + if (NR_STATIONS % NR_STATIONS_PER_PASS == 0 || first_station + s < NR_STATIONS) + _local.samples4[0][i] = convert_float4((*samples)[first_station + s][channel][time + t]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for (uint t = 0; t < (NR_SAMPLES_PER_CHANNEL % 16 == 0 ? 16 : min(16U, NR_SAMPLES_PER_CHANNEL - time)); t ++) { + float2 sum = first_station == 0 ? 0 : (*complexVoltages)[channel][time + t][tab][pol]; + +#if NR_STATIONS_PER_PASS >= 1 + if (first_station + 1 < NR_STATIONS) { + sample = _local.samples[ 0][t][pol]; + sum += weight_00.xx * sample; + sum += weight_00.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 2 + if (first_station + 2 < NR_STATIONS) { + sample = _local.samples[ 1][t][pol]; + sum += weight_01.xx * sample; + sum += weight_01.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 3 + if (first_station + 3 < NR_STATIONS) { + sample = _local.samples[ 2][t][pol]; + sum += weight_02.xx * sample; + sum += weight_02.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 4 + if (first_station + 4 < NR_STATIONS) { + sample = _local.samples[ 3][t][pol]; + sum += weight_03.xx * sample; + sum += weight_03.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 5 + if (first_station + 5 < NR_STATIONS) { + sample = _local.samples[ 4][t][pol]; + sum += weight_04.xx * sample; + sum += weight_04.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 6 + if (first_station + 6 < NR_STATIONS) { + sample = _local.samples[ 5][t][pol]; + sum += weight_05.xx * sample; + sum += weight_05.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 7 + if (first_station + 7 < NR_STATIONS) { + sample = _local.samples[ 6][t][pol]; + sum += weight_06.xx * sample; + sum += weight_06.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 8 + if (first_station + 8 < NR_STATIONS) { + sample = _local.samples[ 7][t][pol]; + sum += weight_07.xx * sample; + sum += weight_07.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 9 + if (first_station + 9 < NR_STATIONS) { + sample = _local.samples[ 8][t][pol]; + sum += weight_08.xx * sample; + sum += weight_08.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 10 + if (first_station + 10 < NR_STATIONS) { + sample = _local.samples[ 9][t][pol]; + sum += weight_09.xx * sample; + sum += weight_09.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 11 + if (first_station + 11 < NR_STATIONS) { + sample = _local.samples[10][t][pol]; + sum += weight_10.xx * sample; + sum += weight_10.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 12 + if (first_station + 12 < NR_STATIONS) { + sample = _local.samples[11][t][pol]; + sum += weight_11.xx * sample; + sum += weight_11.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 13 + if (first_station + 13 < NR_STATIONS) { + sample = _local.samples[12][t][pol]; + sum += weight_12.xx * sample; + sum += weight_12.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 14 + if (first_station + 14 < NR_STATIONS) { + sample = _local.samples[13][t][pol]; + sum += weight_13.xx * sample; + sum += weight_13.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 15 + if (first_station + 15 < NR_STATIONS) { + sample = _local.samples[14][t][pol]; + sum += weight_14.xx * sample; + sum += weight_14.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 16 + if (first_station + 15 < NR_STATIONS) { + sample = _local.samples[15][t][pol]; + sum += weight_15.xx * sample; + sum += weight_15.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 17 + if (first_station + 16 < NR_STATIONS) { + sample = _local.samples[16][t][pol]; + sum += weight_16.xx * sample; + sum += weight_16.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 18 + if (first_station + 17 < NR_STATIONS) { + sample = _local.samples[17][t][pol]; + sum += weight_17.xx * sample; + sum += weight_17.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 19 + if (first_station + 18 < NR_STATIONS) { + sample = _local.samples[18][t][pol]; + sum += weight_18.xx * sample; + sum += weight_18.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 20 + if (first_station + 19 < NR_STATIONS) { + sample = _local.samples[19][t][pol]; + sum += weight_19.xx * sample; + sum += weight_19.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 21 + if (first_station + 20 < NR_STATIONS) { + sample = _local.samples[20][t][pol]; + sum += weight_20.xx * sample; + sum += weight_20.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 22 + if (first_station + 21 < NR_STATIONS) { + sample = _local.samples[21][t][pol]; + sum += weight_21.xx * sample; + sum += weight_21.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 23 + if (first_station + 22 < NR_STATIONS) { + sample = _local.samples[22][t][pol]; + sum += weight_22.xx * sample; + sum += weight_22.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 24 + if (first_station + 23 < NR_STATIONS) { + sample = _local.samples[23][t][pol]; + sum += weight_23.xx * sample; + sum += weight_23.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 25 + if (first_station + 25 < NR_STATIONS) { + sample = _local.samples[24][t][pol]; + sum += weight_24.xx * sample; + sum += weight_24.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 26 + if (first_station + 25 < NR_STATIONS) { + sample = _local.samples[25][t][pol]; + sum += weight_25.xx * sample; + sum += weight_25.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 27 + if (first_station + 26 < NR_STATIONS) { + sample = _local.samples[26][t][pol]; + sum += weight_26.xx * sample; + sum += weight_26.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 28 + if (first_station + 27 < NR_STATIONS) { + sample = _local.samples[27][t][pol]; + sum += weight_27.xx * sample; + sum += weight_27.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 29 + if (first_station + 28 < NR_STATIONS) { + sample = _local.samples[28][t][pol]; + sum += weight_28.xx * sample; + sum += weight_28.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 30 + if (first_station + 29 < NR_STATIONS) { + sample = _local.samples[29][t][pol]; + sum += weight_29.xx * sample; + sum += weight_29.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 31 + if (first_station + 30 < NR_STATIONS) { + sample = _local.samples[30][t][pol]; + sum += weight_30.xx * sample; + sum += weight_30.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 32 + if (first_station + 31 < NR_STATIONS) { + sample = _local.samples[31][t][pol]; + sum += weight_31.xx * sample; + sum += weight_31.yy * (float2) (-sample.y, sample.x); + } +#endif + + (*complexVoltages)[channel][time + t][tab][pol] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + } +} diff --git a/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl-0.ptx b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..7f1d211419f4937459be9ecf7a4c5dbc50a45446 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl-0.ptx @@ -0,0 +1,607 @@ +// +// Generated by NVIDIA NVVM Compiler +// Compiler built on Sat Sep 29 10:12:13 2012 (1348906333) +// Driver 304.54 +// + +.version 3.0 +.target sm_30, texmode_independent +.address_size 32 + +.extern .shared .align 16 .b8 shr_4__local[4096]; + +.entry complexVoltages( + .param .u32 .ptr .global .align 1 complexVoltages_param_0, + .param .u32 .ptr .global .align 1 complexVoltages_param_1, + .param .u32 .ptr .global .align 1 complexVoltages_param_2 +) +{ + .reg .f32 %f<1124>; + .reg .pred %p<13>; + .reg .s32 %r<206>; + + + ld.param.u32 %r1, [complexVoltages_param_0]; + ld.param.u32 %r3, [complexVoltages_param_2]; + // inline asm + mov.u32 %r80, %tid.x; + // inline asm + // inline asm + mov.u32 %r81, %tid.y; + // inline asm + // inline asm + mov.u32 %r82, %envreg5; + // inline asm + // inline asm + mov.u32 %r83, %ntid.z; + // inline asm + // inline asm + mov.u32 %r84, %ctaid.z; + // inline asm + // inline asm + mov.u32 %r85, %tid.z; + // inline asm + add.s32 %r87, %r85, %r82; + mad.lo.s32 %r10, %r84, %r83, %r87; + shl.b32 %r88, %r10, 10; + shl.b32 %r89, %r81, 3; + add.s32 %r90, %r88, %r89; + add.s32 %r91, %r3, %r90; + ld.global.v2.f32 {%f1028, %f1029}, [%r91]; + ld.global.v2.f32 {%f1034, %f1035}, [%r91+2097152]; + ld.global.v2.f32 {%f1040, %f1041}, [%r91+4194304]; + ld.global.v2.f32 {%f1046, %f1047}, [%r91+6291456]; + ld.global.v2.f32 {%f1052, %f1053}, [%r91+8388608]; + ld.global.v2.f32 {%f1058, %f1059}, [%r91+10485760]; + ld.global.v2.f32 {%f1064, %f1065}, [%r91+12582912]; + ld.global.v2.f32 {%f1070, %f1071}, [%r91+14680064]; + ld.global.v2.f32 {%f1076, %f1077}, [%r91+16777216]; + ld.global.v2.f32 {%f1082, %f1083}, [%r91+18874368]; + ld.global.v2.f32 {%f1088, %f1089}, [%r91+20971520]; + ld.global.v2.f32 {%f1094, %f1095}, [%r91+23068672]; + ld.global.v2.f32 {%f1100, %f1101}, [%r91+25165824]; + ld.global.v2.f32 {%f1106, %f1107}, [%r91+27262976]; + ld.global.v2.f32 {%f1112, %f1113}, [%r91+29360128]; + ld.global.v2.f32 {%f1118, %f1119}, [%r91+31457280]; + shl.b32 %r92, %r10, 16; + shl.b32 %r93, %r81, 4; + add.s32 %r94, %r92, %r93; + shl.b32 %r95, %r80, 3; + add.s32 %r96, %r94, %r95; + add.s32 %r189, %r1, %r96; + mov.u32 %r97, shr_4__local; + add.s32 %r98, %r97, %r95; + add.s32 %r12, %r98, 2048; + mov.u32 %r182, 0; + +BB0_1: + mov.u32 %r187, %r189; + mov.u32 %r13, %r187; + shl.b32 %r15, %r182, 4; + // inline asm + mov.u32 %r99, %tid.x; + // inline asm + // inline asm + mov.u32 %r100, %tid.y; + // inline asm + shl.b32 %r17, %r100, 1; + add.s32 %r184, %r17, %r99; + setp.gt.u32 %p1, %r184, 255; + @%p1 bra BB0_4; + + add.s32 %r19, %r99, %r17; + and.b32 %r102, %r19, 15; + add.s32 %r20, %r15, %r102; + mov.u32 %r183, 0; + +BB0_3: + mov.u32 %r22, %r184; + add.s32 %r103, %r19, %r183; + and.b32 %r104, %r103, 65520; + shl.b32 %r105, %r104, 16; + ld.param.u32 %r179, [complexVoltages_param_1]; + add.s32 %r106, %r179, %r105; + shl.b32 %r107, %r10, 9; + add.s32 %r108, %r106, %r107; + shl.b32 %r109, %r20, 4; + add.s32 %r110, %r108, %r109; + shl.b32 %r111, %r22, 4; + add.s32 %r113, %r97, %r111; + ld.global.v4.f32 {%f1024, %f1025, %f1026, %f1027}, [%r110]; + st.shared.v4.f32 [%r113], {%f1024, %f1025, %f1026, %f1027}; + add.s32 %r23, %r22, 256; + add.s32 %r183, %r183, 256; + setp.gt.u32 %p2, %r22, -257; + mov.u32 %r184, %r23; + @%p2 bra BB0_3; + +BB0_4: + mov.u32 %r186, %r12; + bar.sync 0; + mov.u32 %r185, 16; + mov.u32 %r188, %r13; + +BB0_5: + mov.u32 %r28, %r188; + ld.shared.v2.f32 {%f798, %f799}, [%r186+-2048]; + mov.f32 %f1, 0f00000000; + fma.rn.ftz.f32 %f802, %f1028, %f798, %f1; + fma.rn.ftz.f32 %f803, %f1028, %f799, %f1; + neg.ftz.f32 %f3, %f799; + fma.rn.ftz.f32 %f810, %f1029, %f3, %f802; + fma.rn.ftz.f32 %f811, %f1029, %f798, %f803; + ld.shared.v2.f32 {%f814, %f815}, [%r186+-1792]; + fma.rn.ftz.f32 %f816, %f1034, %f814, %f810; + fma.rn.ftz.f32 %f817, %f1034, %f815, %f811; + neg.ftz.f32 %f6, %f815; + fma.rn.ftz.f32 %f824, %f1035, %f6, %f816; + fma.rn.ftz.f32 %f825, %f1035, %f814, %f817; + ld.shared.v2.f32 {%f828, %f829}, [%r186+-1536]; + fma.rn.ftz.f32 %f830, %f1040, %f828, %f824; + fma.rn.ftz.f32 %f831, %f1040, %f829, %f825; + neg.ftz.f32 %f9, %f829; + fma.rn.ftz.f32 %f838, %f1041, %f9, %f830; + fma.rn.ftz.f32 %f839, %f1041, %f828, %f831; + ld.shared.v2.f32 {%f842, %f843}, [%r186+-1280]; + fma.rn.ftz.f32 %f844, %f1046, %f842, %f838; + fma.rn.ftz.f32 %f845, %f1046, %f843, %f839; + neg.ftz.f32 %f12, %f843; + fma.rn.ftz.f32 %f852, %f1047, %f12, %f844; + fma.rn.ftz.f32 %f853, %f1047, %f842, %f845; + ld.shared.v2.f32 {%f856, %f857}, [%r186+-1024]; + fma.rn.ftz.f32 %f858, %f1052, %f856, %f852; + fma.rn.ftz.f32 %f859, %f1052, %f857, %f853; + neg.ftz.f32 %f15, %f857; + fma.rn.ftz.f32 %f866, %f1053, %f15, %f858; + fma.rn.ftz.f32 %f867, %f1053, %f856, %f859; + ld.shared.v2.f32 {%f870, %f871}, [%r186+-768]; + fma.rn.ftz.f32 %f872, %f1058, %f870, %f866; + fma.rn.ftz.f32 %f873, %f1058, %f871, %f867; + neg.ftz.f32 %f18, %f871; + fma.rn.ftz.f32 %f880, %f1059, %f18, %f872; + fma.rn.ftz.f32 %f881, %f1059, %f870, %f873; + ld.shared.v2.f32 {%f884, %f885}, [%r186+-512]; + fma.rn.ftz.f32 %f886, %f1064, %f884, %f880; + fma.rn.ftz.f32 %f887, %f1064, %f885, %f881; + neg.ftz.f32 %f21, %f885; + fma.rn.ftz.f32 %f894, %f1065, %f21, %f886; + fma.rn.ftz.f32 %f895, %f1065, %f884, %f887; + ld.shared.v2.f32 {%f898, %f899}, [%r186+-256]; + fma.rn.ftz.f32 %f900, %f1070, %f898, %f894; + fma.rn.ftz.f32 %f901, %f1070, %f899, %f895; + neg.ftz.f32 %f24, %f899; + fma.rn.ftz.f32 %f908, %f1071, %f24, %f900; + fma.rn.ftz.f32 %f909, %f1071, %f898, %f901; + ld.shared.v2.f32 {%f912, %f913}, [%r186]; + fma.rn.ftz.f32 %f914, %f1076, %f912, %f908; + fma.rn.ftz.f32 %f915, %f1076, %f913, %f909; + neg.ftz.f32 %f27, %f913; + fma.rn.ftz.f32 %f922, %f1077, %f27, %f914; + fma.rn.ftz.f32 %f923, %f1077, %f912, %f915; + ld.shared.v2.f32 {%f926, %f927}, [%r186+256]; + fma.rn.ftz.f32 %f928, %f1082, %f926, %f922; + fma.rn.ftz.f32 %f929, %f1082, %f927, %f923; + neg.ftz.f32 %f30, %f927; + fma.rn.ftz.f32 %f936, %f1083, %f30, %f928; + fma.rn.ftz.f32 %f937, %f1083, %f926, %f929; + ld.shared.v2.f32 {%f940, %f941}, [%r186+512]; + fma.rn.ftz.f32 %f942, %f1088, %f940, %f936; + fma.rn.ftz.f32 %f943, %f1088, %f941, %f937; + neg.ftz.f32 %f33, %f941; + fma.rn.ftz.f32 %f950, %f1089, %f33, %f942; + fma.rn.ftz.f32 %f951, %f1089, %f940, %f943; + ld.shared.v2.f32 {%f954, %f955}, [%r186+768]; + fma.rn.ftz.f32 %f956, %f1094, %f954, %f950; + fma.rn.ftz.f32 %f957, %f1094, %f955, %f951; + neg.ftz.f32 %f36, %f955; + fma.rn.ftz.f32 %f964, %f1095, %f36, %f956; + fma.rn.ftz.f32 %f965, %f1095, %f954, %f957; + ld.shared.v2.f32 {%f968, %f969}, [%r186+1024]; + fma.rn.ftz.f32 %f970, %f1100, %f968, %f964; + fma.rn.ftz.f32 %f971, %f1100, %f969, %f965; + neg.ftz.f32 %f39, %f969; + fma.rn.ftz.f32 %f978, %f1101, %f39, %f970; + fma.rn.ftz.f32 %f979, %f1101, %f968, %f971; + ld.shared.v2.f32 {%f982, %f983}, [%r186+1280]; + fma.rn.ftz.f32 %f984, %f1106, %f982, %f978; + fma.rn.ftz.f32 %f985, %f1106, %f983, %f979; + neg.ftz.f32 %f42, %f983; + fma.rn.ftz.f32 %f992, %f1107, %f42, %f984; + fma.rn.ftz.f32 %f993, %f1107, %f982, %f985; + ld.shared.v2.f32 {%f996, %f997}, [%r186+1536]; + fma.rn.ftz.f32 %f998, %f1112, %f996, %f992; + fma.rn.ftz.f32 %f999, %f1112, %f997, %f993; + neg.ftz.f32 %f45, %f997; + fma.rn.ftz.f32 %f1006, %f1113, %f45, %f998; + fma.rn.ftz.f32 %f1007, %f1113, %f996, %f999; + ld.shared.v2.f32 {%f1010, %f1011}, [%r186+1792]; + fma.rn.ftz.f32 %f1012, %f1118, %f1010, %f1006; + fma.rn.ftz.f32 %f1013, %f1118, %f1011, %f1007; + neg.ftz.f32 %f48, %f1011; + fma.rn.ftz.f32 %f1020, %f1119, %f48, %f1012; + fma.rn.ftz.f32 %f1021, %f1119, %f1010, %f1013; + st.global.v2.f32 [%r28], {%f1020, %f1021}; + add.s32 %r29, %r28, 2048; + add.s32 %r186, %r186, 16; + add.s32 %r185, %r185, -1; + setp.ne.s32 %p3, %r185, 0; + mov.u32 %r188, %r29; + @%p3 bra BB0_5; + + bar.sync 0; + add.s32 %r182, %r182, 1; + add.s32 %r33, %r13, 32768; + setp.ne.s32 %p4, %r182, 2; + mov.u32 %r189, %r33; + @%p4 bra BB0_1; + + ld.param.u32 %r181, [complexVoltages_param_2]; + add.s32 %r119, %r181, %r90; + ld.global.v2.f32 {%f702, %f703}, [%r119+33554432]; + ld.global.v2.f32 {%f708, %f709}, [%r119+35651584]; + ld.global.v2.f32 {%f714, %f715}, [%r119+37748736]; + ld.global.v2.f32 {%f720, %f721}, [%r119+39845888]; + ld.global.v2.f32 {%f726, %f727}, [%r119+41943040]; + ld.global.v2.f32 {%f732, %f733}, [%r119+44040192]; + ld.global.v2.f32 {%f738, %f739}, [%r119+46137344]; + ld.global.v2.f32 {%f744, %f745}, [%r119+48234496]; + ld.global.v2.f32 {%f750, %f751}, [%r119+50331648]; + ld.global.v2.f32 {%f756, %f757}, [%r119+52428800]; + ld.global.v2.f32 {%f762, %f763}, [%r119+54525952]; + ld.global.v2.f32 {%f768, %f769}, [%r119+56623104]; + ld.global.v2.f32 {%f774, %f775}, [%r119+58720256]; + ld.global.v2.f32 {%f780, %f781}, [%r119+60817408]; + ld.global.v2.f32 {%f786, %f787}, [%r119+62914560]; + ld.global.v2.f32 {%f792, %f793}, [%r119+65011712]; + ld.param.u32 %r176, [complexVoltages_param_0]; + add.s32 %r197, %r176, %r96; + mov.u32 %r190, 0; + +BB0_8: + mov.u32 %r195, %r197; + mov.u32 %r36, %r195; + shl.b32 %r38, %r190, 4; + // inline asm + mov.u32 %r129, %tid.x; + // inline asm + // inline asm + mov.u32 %r130, %tid.y; + // inline asm + shl.b32 %r40, %r130, 1; + add.s32 %r192, %r40, %r129; + setp.gt.u32 %p5, %r192, 255; + @%p5 bra BB0_11; + + add.s32 %r42, %r129, %r40; + and.b32 %r132, %r42, 15; + add.s32 %r43, %r38, %r132; + mov.u32 %r191, 0; + +BB0_10: + mov.u32 %r45, %r192; + add.s32 %r133, %r42, %r191; + shl.b32 %r134, %r43, 4; + shl.b32 %r135, %r10, 9; + shl.b32 %r136, %r45, 4; + add.s32 %r138, %r97, %r136; + and.b32 %r139, %r133, 65520; + shl.b32 %r140, %r139, 16; + ld.param.u32 %r178, [complexVoltages_param_1]; + add.s32 %r141, %r140, %r178; + add.s32 %r142, %r141, %r135; + add.s32 %r143, %r142, %r134; + ld.global.v4.f32 {%f698, %f699, %f700, %f701}, [%r143+16777216]; + st.shared.v4.f32 [%r138], {%f698, %f699, %f700, %f701}; + add.s32 %r46, %r45, 256; + add.s32 %r191, %r191, 256; + setp.gt.u32 %p6, %r45, -257; + mov.u32 %r192, %r46; + @%p6 bra BB0_10; + +BB0_11: + mov.u32 %r194, %r12; + bar.sync 0; + mov.u32 %r193, 16; + mov.u32 %r196, %r36; + +BB0_12: + mov.u32 %r51, %r196; + ld.shared.v2.f32 {%f472, %f473}, [%r194+-2048]; + ld.global.v2.f32 {%f474, %f475}, [%r51]; + fma.rn.ftz.f32 %f476, %f702, %f472, %f474; + fma.rn.ftz.f32 %f477, %f702, %f473, %f475; + neg.ftz.f32 %f51, %f473; + fma.rn.ftz.f32 %f484, %f703, %f51, %f476; + fma.rn.ftz.f32 %f485, %f703, %f472, %f477; + ld.shared.v2.f32 {%f488, %f489}, [%r194+-1792]; + fma.rn.ftz.f32 %f490, %f708, %f488, %f484; + fma.rn.ftz.f32 %f491, %f708, %f489, %f485; + neg.ftz.f32 %f54, %f489; + fma.rn.ftz.f32 %f498, %f709, %f54, %f490; + fma.rn.ftz.f32 %f499, %f709, %f488, %f491; + ld.shared.v2.f32 {%f502, %f503}, [%r194+-1536]; + fma.rn.ftz.f32 %f504, %f714, %f502, %f498; + fma.rn.ftz.f32 %f505, %f714, %f503, %f499; + neg.ftz.f32 %f57, %f503; + fma.rn.ftz.f32 %f512, %f715, %f57, %f504; + fma.rn.ftz.f32 %f513, %f715, %f502, %f505; + ld.shared.v2.f32 {%f516, %f517}, [%r194+-1280]; + fma.rn.ftz.f32 %f518, %f720, %f516, %f512; + fma.rn.ftz.f32 %f519, %f720, %f517, %f513; + neg.ftz.f32 %f60, %f517; + fma.rn.ftz.f32 %f526, %f721, %f60, %f518; + fma.rn.ftz.f32 %f527, %f721, %f516, %f519; + ld.shared.v2.f32 {%f530, %f531}, [%r194+-1024]; + fma.rn.ftz.f32 %f532, %f726, %f530, %f526; + fma.rn.ftz.f32 %f533, %f726, %f531, %f527; + neg.ftz.f32 %f63, %f531; + fma.rn.ftz.f32 %f540, %f727, %f63, %f532; + fma.rn.ftz.f32 %f541, %f727, %f530, %f533; + ld.shared.v2.f32 {%f544, %f545}, [%r194+-768]; + fma.rn.ftz.f32 %f546, %f732, %f544, %f540; + fma.rn.ftz.f32 %f547, %f732, %f545, %f541; + neg.ftz.f32 %f66, %f545; + fma.rn.ftz.f32 %f554, %f733, %f66, %f546; + fma.rn.ftz.f32 %f555, %f733, %f544, %f547; + ld.shared.v2.f32 {%f558, %f559}, [%r194+-512]; + fma.rn.ftz.f32 %f560, %f738, %f558, %f554; + fma.rn.ftz.f32 %f561, %f738, %f559, %f555; + neg.ftz.f32 %f69, %f559; + fma.rn.ftz.f32 %f568, %f739, %f69, %f560; + fma.rn.ftz.f32 %f569, %f739, %f558, %f561; + ld.shared.v2.f32 {%f572, %f573}, [%r194+-256]; + fma.rn.ftz.f32 %f574, %f744, %f572, %f568; + fma.rn.ftz.f32 %f575, %f744, %f573, %f569; + neg.ftz.f32 %f72, %f573; + fma.rn.ftz.f32 %f582, %f745, %f72, %f574; + fma.rn.ftz.f32 %f583, %f745, %f572, %f575; + ld.shared.v2.f32 {%f586, %f587}, [%r194]; + fma.rn.ftz.f32 %f588, %f750, %f586, %f582; + fma.rn.ftz.f32 %f589, %f750, %f587, %f583; + neg.ftz.f32 %f75, %f587; + fma.rn.ftz.f32 %f596, %f751, %f75, %f588; + fma.rn.ftz.f32 %f597, %f751, %f586, %f589; + ld.shared.v2.f32 {%f600, %f601}, [%r194+256]; + fma.rn.ftz.f32 %f602, %f756, %f600, %f596; + fma.rn.ftz.f32 %f603, %f756, %f601, %f597; + neg.ftz.f32 %f78, %f601; + fma.rn.ftz.f32 %f610, %f757, %f78, %f602; + fma.rn.ftz.f32 %f611, %f757, %f600, %f603; + ld.shared.v2.f32 {%f614, %f615}, [%r194+512]; + fma.rn.ftz.f32 %f616, %f762, %f614, %f610; + fma.rn.ftz.f32 %f617, %f762, %f615, %f611; + neg.ftz.f32 %f81, %f615; + fma.rn.ftz.f32 %f624, %f763, %f81, %f616; + fma.rn.ftz.f32 %f625, %f763, %f614, %f617; + ld.shared.v2.f32 {%f628, %f629}, [%r194+768]; + fma.rn.ftz.f32 %f630, %f768, %f628, %f624; + fma.rn.ftz.f32 %f631, %f768, %f629, %f625; + neg.ftz.f32 %f84, %f629; + fma.rn.ftz.f32 %f638, %f769, %f84, %f630; + fma.rn.ftz.f32 %f639, %f769, %f628, %f631; + ld.shared.v2.f32 {%f642, %f643}, [%r194+1024]; + fma.rn.ftz.f32 %f644, %f774, %f642, %f638; + fma.rn.ftz.f32 %f645, %f774, %f643, %f639; + neg.ftz.f32 %f87, %f643; + fma.rn.ftz.f32 %f652, %f775, %f87, %f644; + fma.rn.ftz.f32 %f653, %f775, %f642, %f645; + ld.shared.v2.f32 {%f656, %f657}, [%r194+1280]; + fma.rn.ftz.f32 %f658, %f780, %f656, %f652; + fma.rn.ftz.f32 %f659, %f780, %f657, %f653; + neg.ftz.f32 %f90, %f657; + fma.rn.ftz.f32 %f666, %f781, %f90, %f658; + fma.rn.ftz.f32 %f667, %f781, %f656, %f659; + ld.shared.v2.f32 {%f670, %f671}, [%r194+1536]; + fma.rn.ftz.f32 %f672, %f786, %f670, %f666; + fma.rn.ftz.f32 %f673, %f786, %f671, %f667; + neg.ftz.f32 %f93, %f671; + fma.rn.ftz.f32 %f680, %f787, %f93, %f672; + fma.rn.ftz.f32 %f681, %f787, %f670, %f673; + ld.shared.v2.f32 {%f684, %f685}, [%r194+1792]; + fma.rn.ftz.f32 %f686, %f792, %f684, %f680; + fma.rn.ftz.f32 %f687, %f792, %f685, %f681; + neg.ftz.f32 %f96, %f685; + fma.rn.ftz.f32 %f694, %f793, %f96, %f686; + fma.rn.ftz.f32 %f695, %f793, %f684, %f687; + st.global.v2.f32 [%r51], {%f694, %f695}; + add.s32 %r52, %r51, 2048; + add.s32 %r194, %r194, 16; + add.s32 %r193, %r193, -1; + setp.ne.s32 %p7, %r193, 0; + mov.u32 %r196, %r52; + @%p7 bra BB0_12; + + bar.sync 0; + add.s32 %r190, %r190, 1; + add.s32 %r56, %r36, 32768; + setp.ne.s32 %p8, %r190, 2; + mov.u32 %r197, %r56; + @%p8 bra BB0_8; + + ld.param.u32 %r180, [complexVoltages_param_2]; + add.s32 %r149, %r180, %r90; + ld.global.v2.f32 {%f376, %f377}, [%r149+67108864]; + ld.global.v2.f32 {%f382, %f383}, [%r149+69206016]; + ld.global.v2.f32 {%f388, %f389}, [%r149+71303168]; + ld.global.v2.f32 {%f394, %f395}, [%r149+73400320]; + ld.global.v2.f32 {%f400, %f401}, [%r149+75497472]; + ld.global.v2.f32 {%f406, %f407}, [%r149+77594624]; + ld.global.v2.f32 {%f412, %f413}, [%r149+79691776]; + ld.global.v2.f32 {%f418, %f419}, [%r149+81788928]; + ld.global.v2.f32 {%f424, %f425}, [%r149+83886080]; + ld.global.v2.f32 {%f430, %f431}, [%r149+85983232]; + ld.global.v2.f32 {%f436, %f437}, [%r149+88080384]; + ld.global.v2.f32 {%f442, %f443}, [%r149+90177536]; + ld.global.v2.f32 {%f448, %f449}, [%r149+92274688]; + ld.global.v2.f32 {%f454, %f455}, [%r149+94371840]; + ld.global.v2.f32 {%f460, %f461}, [%r149+96468992]; + ld.global.v2.f32 {%f466, %f467}, [%r149+98566144]; + ld.param.u32 %r175, [complexVoltages_param_0]; + add.s32 %r205, %r175, %r96; + mov.u32 %r198, 0; + +BB0_15: + mov.u32 %r203, %r205; + mov.u32 %r59, %r203; + shl.b32 %r61, %r198, 4; + // inline asm + mov.u32 %r159, %tid.x; + // inline asm + // inline asm + mov.u32 %r160, %tid.y; + // inline asm + shl.b32 %r63, %r160, 1; + add.s32 %r200, %r63, %r159; + setp.gt.u32 %p9, %r200, 255; + @%p9 bra BB0_18; + + add.s32 %r65, %r159, %r63; + and.b32 %r162, %r65, 15; + add.s32 %r66, %r61, %r162; + mov.u32 %r199, 0; + +BB0_17: + mov.u32 %r68, %r200; + add.s32 %r163, %r65, %r199; + shl.b32 %r164, %r66, 4; + shl.b32 %r165, %r10, 9; + shl.b32 %r166, %r68, 4; + add.s32 %r168, %r97, %r166; + and.b32 %r169, %r163, 65520; + shl.b32 %r170, %r169, 16; + ld.param.u32 %r177, [complexVoltages_param_1]; + add.s32 %r171, %r170, %r177; + add.s32 %r172, %r171, %r165; + add.s32 %r173, %r172, %r164; + ld.global.v4.f32 {%f372, %f373, %f374, %f375}, [%r173+33554432]; + st.shared.v4.f32 [%r168], {%f372, %f373, %f374, %f375}; + add.s32 %r69, %r68, 256; + add.s32 %r199, %r199, 256; + setp.gt.u32 %p10, %r68, -257; + mov.u32 %r200, %r69; + @%p10 bra BB0_17; + +BB0_18: + mov.u32 %r202, %r12; + bar.sync 0; + mov.u32 %r201, 16; + mov.u32 %r204, %r59; + +BB0_19: + mov.u32 %r74, %r204; + ld.shared.v2.f32 {%f146, %f147}, [%r202+-2048]; + ld.global.v2.f32 {%f148, %f149}, [%r74]; + fma.rn.ftz.f32 %f150, %f376, %f146, %f148; + fma.rn.ftz.f32 %f151, %f376, %f147, %f149; + neg.ftz.f32 %f99, %f147; + fma.rn.ftz.f32 %f158, %f377, %f99, %f150; + fma.rn.ftz.f32 %f159, %f377, %f146, %f151; + ld.shared.v2.f32 {%f162, %f163}, [%r202+-1792]; + fma.rn.ftz.f32 %f164, %f382, %f162, %f158; + fma.rn.ftz.f32 %f165, %f382, %f163, %f159; + neg.ftz.f32 %f102, %f163; + fma.rn.ftz.f32 %f172, %f383, %f102, %f164; + fma.rn.ftz.f32 %f173, %f383, %f162, %f165; + ld.shared.v2.f32 {%f176, %f177}, [%r202+-1536]; + fma.rn.ftz.f32 %f178, %f388, %f176, %f172; + fma.rn.ftz.f32 %f179, %f388, %f177, %f173; + neg.ftz.f32 %f105, %f177; + fma.rn.ftz.f32 %f186, %f389, %f105, %f178; + fma.rn.ftz.f32 %f187, %f389, %f176, %f179; + ld.shared.v2.f32 {%f190, %f191}, [%r202+-1280]; + fma.rn.ftz.f32 %f192, %f394, %f190, %f186; + fma.rn.ftz.f32 %f193, %f394, %f191, %f187; + neg.ftz.f32 %f108, %f191; + fma.rn.ftz.f32 %f200, %f395, %f108, %f192; + fma.rn.ftz.f32 %f201, %f395, %f190, %f193; + ld.shared.v2.f32 {%f204, %f205}, [%r202+-1024]; + fma.rn.ftz.f32 %f206, %f400, %f204, %f200; + fma.rn.ftz.f32 %f207, %f400, %f205, %f201; + neg.ftz.f32 %f111, %f205; + fma.rn.ftz.f32 %f214, %f401, %f111, %f206; + fma.rn.ftz.f32 %f215, %f401, %f204, %f207; + ld.shared.v2.f32 {%f218, %f219}, [%r202+-768]; + fma.rn.ftz.f32 %f220, %f406, %f218, %f214; + fma.rn.ftz.f32 %f221, %f406, %f219, %f215; + neg.ftz.f32 %f114, %f219; + fma.rn.ftz.f32 %f228, %f407, %f114, %f220; + fma.rn.ftz.f32 %f229, %f407, %f218, %f221; + ld.shared.v2.f32 {%f232, %f233}, [%r202+-512]; + fma.rn.ftz.f32 %f234, %f412, %f232, %f228; + fma.rn.ftz.f32 %f235, %f412, %f233, %f229; + neg.ftz.f32 %f117, %f233; + fma.rn.ftz.f32 %f242, %f413, %f117, %f234; + fma.rn.ftz.f32 %f243, %f413, %f232, %f235; + ld.shared.v2.f32 {%f246, %f247}, [%r202+-256]; + fma.rn.ftz.f32 %f248, %f418, %f246, %f242; + fma.rn.ftz.f32 %f249, %f418, %f247, %f243; + neg.ftz.f32 %f120, %f247; + fma.rn.ftz.f32 %f256, %f419, %f120, %f248; + fma.rn.ftz.f32 %f257, %f419, %f246, %f249; + ld.shared.v2.f32 {%f260, %f261}, [%r202]; + fma.rn.ftz.f32 %f262, %f424, %f260, %f256; + fma.rn.ftz.f32 %f263, %f424, %f261, %f257; + neg.ftz.f32 %f123, %f261; + fma.rn.ftz.f32 %f270, %f425, %f123, %f262; + fma.rn.ftz.f32 %f271, %f425, %f260, %f263; + ld.shared.v2.f32 {%f274, %f275}, [%r202+256]; + fma.rn.ftz.f32 %f276, %f430, %f274, %f270; + fma.rn.ftz.f32 %f277, %f430, %f275, %f271; + neg.ftz.f32 %f126, %f275; + fma.rn.ftz.f32 %f284, %f431, %f126, %f276; + fma.rn.ftz.f32 %f285, %f431, %f274, %f277; + ld.shared.v2.f32 {%f288, %f289}, [%r202+512]; + fma.rn.ftz.f32 %f290, %f436, %f288, %f284; + fma.rn.ftz.f32 %f291, %f436, %f289, %f285; + neg.ftz.f32 %f129, %f289; + fma.rn.ftz.f32 %f298, %f437, %f129, %f290; + fma.rn.ftz.f32 %f299, %f437, %f288, %f291; + ld.shared.v2.f32 {%f302, %f303}, [%r202+768]; + fma.rn.ftz.f32 %f304, %f442, %f302, %f298; + fma.rn.ftz.f32 %f305, %f442, %f303, %f299; + neg.ftz.f32 %f132, %f303; + fma.rn.ftz.f32 %f312, %f443, %f132, %f304; + fma.rn.ftz.f32 %f313, %f443, %f302, %f305; + ld.shared.v2.f32 {%f316, %f317}, [%r202+1024]; + fma.rn.ftz.f32 %f318, %f448, %f316, %f312; + fma.rn.ftz.f32 %f319, %f448, %f317, %f313; + neg.ftz.f32 %f135, %f317; + fma.rn.ftz.f32 %f326, %f449, %f135, %f318; + fma.rn.ftz.f32 %f327, %f449, %f316, %f319; + ld.shared.v2.f32 {%f330, %f331}, [%r202+1280]; + fma.rn.ftz.f32 %f332, %f454, %f330, %f326; + fma.rn.ftz.f32 %f333, %f454, %f331, %f327; + neg.ftz.f32 %f138, %f331; + fma.rn.ftz.f32 %f340, %f455, %f138, %f332; + fma.rn.ftz.f32 %f341, %f455, %f330, %f333; + ld.shared.v2.f32 {%f344, %f345}, [%r202+1536]; + fma.rn.ftz.f32 %f346, %f460, %f344, %f340; + fma.rn.ftz.f32 %f347, %f460, %f345, %f341; + neg.ftz.f32 %f141, %f345; + fma.rn.ftz.f32 %f354, %f461, %f141, %f346; + fma.rn.ftz.f32 %f355, %f461, %f344, %f347; + ld.shared.v2.f32 {%f358, %f359}, [%r202+1792]; + fma.rn.ftz.f32 %f360, %f466, %f358, %f354; + fma.rn.ftz.f32 %f361, %f466, %f359, %f355; + neg.ftz.f32 %f144, %f359; + fma.rn.ftz.f32 %f368, %f467, %f144, %f360; + fma.rn.ftz.f32 %f369, %f467, %f358, %f361; + st.global.v2.f32 [%r74], {%f368, %f369}; + add.s32 %r75, %r74, 2048; + add.s32 %r202, %r202, 16; + add.s32 %r201, %r201, -1; + setp.ne.s32 %p11, %r201, 0; + mov.u32 %r204, %r75; + @%p11 bra BB0_19; + + bar.sync 0; + add.s32 %r198, %r198, 1; + add.s32 %r79, %r59, 32768; + setp.ne.s32 %p12, %r198, 2; + mov.u32 %r205, %r79; + @%p12 bra BB0_15; + + ret; +} + + diff --git a/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.4x3 b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.4x3 new file mode 100644 index 0000000000000000000000000000000000000000..dd4345c6780f1fe950027653833d328899aa4ae8 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.4x3 @@ -0,0 +1,118 @@ +#define NR_CHANNELS_PER_BLOCK 4 + + +typedef __global float4 (*ComplexVoltagesType)[NR_TABS][NR_SAMPLES_PER_INTEGRATION][NR_CHANNELS]; +//typedef __global float4 (*CorrectedDataType)[NR_STATIONS][NR_CHANNELS][NR_SAMPLES_PER_INTEGRATION]; +typedef __global float4 (*CorrectedDataType)[NR_STATIONS][NR_SAMPLES_PER_INTEGRATION][NR_CHANNELS]; +typedef __global float2 (*WeightsType)[NR_STATIONS][NR_CHANNELS][NR_TABS]; + +float2 cmul(float2 a, float2 b) +{ + return (float2) { a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x }; +} + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *correctedDataPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + CorrectedDataType correctedData = (CorrectedDataType) correctedDataPtr; + WeightsType weights = (WeightsType) weightsPtr; + + uint first_tab = 3 * get_global_id(0); + uint channel = get_global_id(1); + uint first_station = 4 * get_global_id(2); + + float2 weight_0_0 = (*weights)[first_station + 0][channel][first_tab + 0]; + float2 weight_0_1 = (*weights)[first_station + 0][channel][first_tab + 1]; + float2 weight_0_2 = (*weights)[first_station + 0][channel][first_tab + 2]; + float2 weight_1_0 = (*weights)[first_station + 1][channel][first_tab + 0]; + float2 weight_1_1 = (*weights)[first_station + 1][channel][first_tab + 1]; + float2 weight_1_2 = (*weights)[first_station + 1][channel][first_tab + 2]; + float2 weight_2_0 = (*weights)[first_station + 2][channel][first_tab + 0]; + float2 weight_2_1 = (*weights)[first_station + 2][channel][first_tab + 1]; + float2 weight_2_2 = (*weights)[first_station + 2][channel][first_tab + 2]; + float2 weight_3_0 = (*weights)[first_station + 3][channel][first_tab + 0]; + float2 weight_3_1 = (*weights)[first_station + 3][channel][first_tab + 1]; + float2 weight_3_2 = (*weights)[first_station + 3][channel][first_tab + 2]; + + __local float4 local_sums[3][NR_STATIONS / 4][NR_CHANNELS_PER_BLOCK][NR_TABS / 3]; + + for (int time = 0 - first_station / 4; time < NR_SAMPLES_PER_INTEGRATION + NR_STATIONS - first_station / 4; time ++) { + barrier(CLK_LOCAL_MEM_FENCE); + + float4 sum_0, sum_1, sum_2; + + if (first_station == 0) { + sum_0 = sum_1 = sum_2 = (float4) { 0, 0, 0, 0 }; + } else { + sum_0 = local_sums[0][get_local_id(2) - 1][get_local_id(1)][get_local_id(0)]; + sum_1 = local_sums[1][get_local_id(2) - 1][get_local_id(1)][get_local_id(0)]; + sum_2 = local_sums[2][get_local_id(2) - 1][get_local_id(1)][get_local_id(0)]; + } + + float4 sample_0 = (*correctedData)[first_station + 0][time][channel]; + float4 sample_1 = (*correctedData)[first_station + 1][time][channel]; + float4 sample_2 = (*correctedData)[first_station + 2][time][channel]; + float4 sample_3 = (*correctedData)[first_station + 3][time][channel]; + + sum_0 += weight_0_0.xxxx * sample_0; + sum_1 += weight_0_1.xxxx * sample_0; + sum_2 += weight_0_2.xxxx * sample_0; + sum_0 += weight_0_1.yyyy * (float4) { -sample_0.y, sample_0.x, -sample_0.w, sample_0.z }; + sum_1 += weight_0_1.yyyy * (float4) { -sample_0.y, sample_0.x, -sample_0.w, sample_0.z }; + sum_2 += weight_0_1.yyyy * (float4) { -sample_0.y, sample_0.x, -sample_0.w, sample_0.z }; + + sum_0 += weight_1_0.xxxx * sample_1; + sum_1 += weight_1_1.xxxx * sample_1; + sum_2 += weight_1_2.xxxx * sample_1; + sum_0 += weight_1_1.yyyy * (float4) { -sample_1.y, sample_1.x, -sample_1.w, sample_1.z }; + sum_1 += weight_1_1.yyyy * (float4) { -sample_1.y, sample_1.x, -sample_1.w, sample_1.z }; + sum_2 += weight_1_1.yyyy * (float4) { -sample_1.y, sample_1.x, -sample_1.w, sample_1.z }; + + sum_0 += weight_2_0.xxxx * sample_2; + sum_1 += weight_2_1.xxxx * sample_2; + sum_2 += weight_2_2.xxxx * sample_2; + sum_0 += weight_2_1.yyyy * (float4) { -sample_2.y, sample_2.x, -sample_2.w, sample_2.z }; + sum_1 += weight_2_1.yyyy * (float4) { -sample_2.y, sample_2.x, -sample_2.w, sample_2.z }; + sum_2 += weight_2_1.yyyy * (float4) { -sample_2.y, sample_2.x, -sample_2.w, sample_2.z }; + + sum_0 += weight_3_0.xxxx * sample_3; + sum_1 += weight_3_1.xxxx * sample_3; + sum_2 += weight_3_2.xxxx * sample_3; + sum_0 += weight_3_1.yyyy * (float4) { -sample_3.y, sample_3.x, -sample_3.w, sample_3.z }; + sum_1 += weight_3_1.yyyy * (float4) { -sample_3.y, sample_3.x, -sample_3.w, sample_3.z }; + sum_2 += weight_3_1.yyyy * (float4) { -sample_3.y, sample_3.x, -sample_3.w, sample_3.z }; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (first_station + 4 < NR_STATIONS) { + local_sums[0][get_local_id(2)][get_local_id(1)][get_local_id(0)] = sum_0; + local_sums[1][get_local_id(2)][get_local_id(1)][get_local_id(0)] = sum_1; + local_sums[2][get_local_id(2)][get_local_id(1)][get_local_id(0)] = sum_2; + } else if (time >= 0) { + (*complexVoltages)[first_tab + 0][time][channel] = sum_0; + (*complexVoltages)[first_tab + 1][time][channel] = sum_1; + (*complexVoltages)[first_tab + 2][time][channel] = sum_2; + } + +#if 0 + local_complex_voltages[tab][local_time_or_channel] = sum; + + barrier(CLK_LOCAL_MEM_FENCE); + + (*complexVoltages)[transposed_tab][start_time + local_time][transposed_channel] = local_complex_voltages[transposed_tab][transposed_channel]; +#elif defined STOKES_I || defined STOKES_IQUV + float powerX = sum.x * sum.x + sum.y * sum.y; + float powerY = sum.z * sum.z + sum.w * sum.w; + + (*stokes)[tab][0][0][time_or_channel] = powerX + powerY; +#if defined STOKES_IQUV + (*stokes)[tab][1][0][time_or_channel] = powerX - powerY; + (*stokes)[tab][2][0][time_or_channel] = 2 * (sum.x * sum.z + sum.y * sum.w); + (*stokes)[tab][3][0][time_or_channel] = 2 * (sum.y * sum.z - sum.x * sum.w); +#endif +#endif + } +} diff --git a/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.6x3 b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.6x3 new file mode 100644 index 0000000000000000000000000000000000000000..ab91e51706981c80d64181d152b298f4a83755e7 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.6x3 @@ -0,0 +1,140 @@ +#define NR_CHANNELS_PER_BLOCK 8 + + +typedef __global float4 (*ComplexVoltagesType)[NR_TABS][NR_SAMPLES_PER_INTEGRATION][NR_CHANNELS]; +//typedef __global float4 (*CorrectedDataType)[NR_STATIONS][NR_CHANNELS][NR_SAMPLES_PER_INTEGRATION]; +typedef __global float4 (*CorrectedDataType)[NR_STATIONS][NR_SAMPLES_PER_INTEGRATION][NR_CHANNELS]; +typedef __global float2 (*WeightsType)[NR_STATIONS][NR_CHANNELS][NR_TABS]; + +float2 cmul(float2 a, float2 b) +{ + return (float2) { a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x }; +} + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *correctedDataPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + CorrectedDataType correctedData = (CorrectedDataType) correctedDataPtr; + WeightsType weights = (WeightsType) weightsPtr; + + uint first_tab = 3 * get_global_id(0); + uint channel = get_global_id(1); + uint first_station = 6 * get_global_id(2); + + float2 weight_0_0 = (*weights)[first_station + 0][channel][first_tab + 0]; + float2 weight_0_1 = (*weights)[first_station + 0][channel][first_tab + 1]; + float2 weight_0_2 = (*weights)[first_station + 0][channel][first_tab + 2]; + float2 weight_1_0 = (*weights)[first_station + 1][channel][first_tab + 0]; + float2 weight_1_1 = (*weights)[first_station + 1][channel][first_tab + 1]; + float2 weight_1_2 = (*weights)[first_station + 1][channel][first_tab + 2]; + float2 weight_2_0 = (*weights)[first_station + 2][channel][first_tab + 0]; + float2 weight_2_1 = (*weights)[first_station + 2][channel][first_tab + 1]; + float2 weight_2_2 = (*weights)[first_station + 2][channel][first_tab + 2]; + float2 weight_3_0 = (*weights)[first_station + 3][channel][first_tab + 0]; + float2 weight_3_1 = (*weights)[first_station + 3][channel][first_tab + 1]; + float2 weight_3_2 = (*weights)[first_station + 3][channel][first_tab + 2]; + float2 weight_4_0 = (*weights)[first_station + 4][channel][first_tab + 0]; + float2 weight_4_1 = (*weights)[first_station + 4][channel][first_tab + 1]; + float2 weight_4_2 = (*weights)[first_station + 4][channel][first_tab + 2]; + float2 weight_5_0 = (*weights)[first_station + 5][channel][first_tab + 0]; + float2 weight_5_1 = (*weights)[first_station + 5][channel][first_tab + 1]; + float2 weight_5_2 = (*weights)[first_station + 5][channel][first_tab + 2]; + + __local float4 local_sums[3][NR_STATIONS / 6][NR_CHANNELS_PER_BLOCK][NR_TABS / 3]; + + for (int time = 0 - first_station / 6; time < NR_SAMPLES_PER_INTEGRATION + NR_STATIONS - first_station / 6; time ++) { + barrier(CLK_LOCAL_MEM_FENCE); + + float4 sum_0, sum_1, sum_2; + + if (first_station == 0) { + sum_0 = sum_1 = sum_2 = (float4) { 0, 0, 0, 0 }; + } else { + sum_0 = local_sums[0][get_local_id(2) - 1][get_local_id(1)][get_local_id(0)]; + sum_1 = local_sums[1][get_local_id(2) - 1][get_local_id(1)][get_local_id(0)]; + sum_2 = local_sums[2][get_local_id(2) - 1][get_local_id(1)][get_local_id(0)]; + } + + float4 sample_0 = (*correctedData)[first_station + 0][time][channel]; + float4 sample_1 = (*correctedData)[first_station + 1][time][channel]; + float4 sample_2 = (*correctedData)[first_station + 2][time][channel]; + float4 sample_3 = (*correctedData)[first_station + 3][time][channel]; + float4 sample_4 = (*correctedData)[first_station + 4][time][channel]; + float4 sample_5 = (*correctedData)[first_station + 5][time][channel]; + + sum_0 += weight_0_0.xxxx * sample_0; + sum_1 += weight_0_1.xxxx * sample_0; + sum_2 += weight_0_2.xxxx * sample_0; + sum_0 += weight_0_1.yyyy * (float4) { -sample_0.y, sample_0.x, -sample_0.w, sample_0.z }; + sum_1 += weight_0_1.yyyy * (float4) { -sample_0.y, sample_0.x, -sample_0.w, sample_0.z }; + sum_2 += weight_0_1.yyyy * (float4) { -sample_0.y, sample_0.x, -sample_0.w, sample_0.z }; + + sum_0 += weight_1_0.xxxx * sample_1; + sum_1 += weight_1_1.xxxx * sample_1; + sum_2 += weight_1_2.xxxx * sample_1; + sum_0 += weight_1_1.yyyy * (float4) { -sample_1.y, sample_1.x, -sample_1.w, sample_1.z }; + sum_1 += weight_1_1.yyyy * (float4) { -sample_1.y, sample_1.x, -sample_1.w, sample_1.z }; + sum_2 += weight_1_1.yyyy * (float4) { -sample_1.y, sample_1.x, -sample_1.w, sample_1.z }; + + sum_0 += weight_2_0.xxxx * sample_2; + sum_1 += weight_2_1.xxxx * sample_2; + sum_2 += weight_2_2.xxxx * sample_2; + sum_0 += weight_2_1.yyyy * (float4) { -sample_2.y, sample_2.x, -sample_2.w, sample_2.z }; + sum_1 += weight_2_1.yyyy * (float4) { -sample_2.y, sample_2.x, -sample_2.w, sample_2.z }; + sum_2 += weight_2_1.yyyy * (float4) { -sample_2.y, sample_2.x, -sample_2.w, sample_2.z }; + + sum_0 += weight_3_0.xxxx * sample_3; + sum_1 += weight_3_1.xxxx * sample_3; + sum_2 += weight_3_2.xxxx * sample_3; + sum_0 += weight_3_1.yyyy * (float4) { -sample_3.y, sample_3.x, -sample_3.w, sample_3.z }; + sum_1 += weight_3_1.yyyy * (float4) { -sample_3.y, sample_3.x, -sample_3.w, sample_3.z }; + sum_2 += weight_3_1.yyyy * (float4) { -sample_3.y, sample_3.x, -sample_3.w, sample_3.z }; + + sum_0 += weight_4_0.xxxx * sample_4; + sum_1 += weight_4_1.xxxx * sample_4; + sum_2 += weight_4_2.xxxx * sample_4; + sum_0 += weight_4_1.yyyy * (float4) { -sample_4.y, sample_4.x, -sample_4.w, sample_4.z }; + sum_1 += weight_4_1.yyyy * (float4) { -sample_4.y, sample_4.x, -sample_4.w, sample_4.z }; + sum_2 += weight_4_1.yyyy * (float4) { -sample_4.y, sample_4.x, -sample_4.w, sample_4.z }; + + sum_0 += weight_5_0.xxxx * sample_5; + sum_1 += weight_5_1.xxxx * sample_5; + sum_2 += weight_5_2.xxxx * sample_5; + sum_0 += weight_5_1.yyyy * (float4) { -sample_5.y, sample_5.x, -sample_5.w, sample_5.z }; + sum_1 += weight_5_1.yyyy * (float4) { -sample_5.y, sample_5.x, -sample_5.w, sample_5.z }; + sum_2 += weight_5_1.yyyy * (float4) { -sample_5.y, sample_5.x, -sample_5.w, sample_5.z }; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (first_station + 6 < NR_STATIONS) { + local_sums[0][get_local_id(2)][get_local_id(1)][get_local_id(0)] = sum_0; + local_sums[1][get_local_id(2)][get_local_id(1)][get_local_id(0)] = sum_1; + local_sums[2][get_local_id(2)][get_local_id(1)][get_local_id(0)] = sum_2; + } else if (time >= 0) { + (*complexVoltages)[first_tab + 0][time][channel] = sum_0; + (*complexVoltages)[first_tab + 1][time][channel] = sum_1; + (*complexVoltages)[first_tab + 2][time][channel] = sum_2; + } + +#if 0 + local_complex_voltages[tab][local_time_or_channel] = sum; + + barrier(CLK_LOCAL_MEM_FENCE); + + (*complexVoltages)[transposed_tab][start_time + local_time][transposed_channel] = local_complex_voltages[transposed_tab][transposed_channel]; +#elif defined STOKES_I || defined STOKES_IQUV + float powerX = sum.x * sum.x + sum.y * sum.y; + float powerY = sum.z * sum.z + sum.w * sum.w; + + (*stokes)[tab][0][0][time_or_channel] = powerX + powerY; +#if defined STOKES_IQUV + (*stokes)[tab][1][0][time_or_channel] = powerX - powerY; + (*stokes)[tab][2][0][time_or_channel] = 2 * (sum.x * sum.z + sum.y * sum.w); + (*stokes)[tab][3][0][time_or_channel] = 2 * (sum.y * sum.z - sum.x * sum.w); +#endif +#endif + } +} diff --git a/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.bak b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.bak new file mode 100644 index 0000000000000000000000000000000000000000..07722c05093a0e1d2f05498b739e0e30849cb29e --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.bak @@ -0,0 +1,536 @@ +#define MAX(A,B) ((A)>(B)?(A):(B)) +#define NR_PASSES MAX((NR_STATIONS + 6) / 16, 1) // gives best results on GTX 680 +#define NR_STATIONS_PER_PASS ((NR_STATIONS + NR_PASSES - 1) / NR_PASSES) + +#if NR_STATIONS_PER_PASS > 32 +#error "need more passes to beam form this number of stations" +#endif + + +typedef __global float2 (*ComplexVoltagesType)[NR_TABS][NR_POLARIZATIONS][NR_CHANNELS][NR_TIMES_PER_BLOCK]; +typedef __global float4 (*BandPassCorrectedType)[NR_STATIONS][NR_CHANNELS][NR_TIMES_PER_BLOCK]; +typedef __global float2 (*WeightsType)[NR_STATIONS][NR_CHANNELS][NR_TABS]; + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *samplesPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + BandPassCorrectedType samples = (BandPassCorrectedType) samplesPtr; + WeightsType weights = (WeightsType) weightsPtr; + + uint minor_time = get_local_id(0); + uint tab = get_global_id(1); + uint channel = get_global_id(2); + + float4 sample; + __local float4 local_samples[NR_STATIONS_PER_PASS][16]; + +#pragma unroll + for (uint first_station = 0; first_station < NR_STATIONS; first_station += NR_STATIONS_PER_PASS) { +#if NR_STATIONS_PER_PASS >= 1 + float2 weight_00; + + if (first_station + 0 < NR_STATIONS) + weight_00 = (*weights)[first_station + 0][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 2 + float2 weight_01; + + if (first_station + 1 < NR_STATIONS) + weight_01 = (*weights)[first_station + 1][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 3 + float2 weight_02; + + if (first_station + 2 < NR_STATIONS) + weight_02 = (*weights)[first_station + 2][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 4 + float2 weight_03; + + if (first_station + 3 < NR_STATIONS) + weight_03 = (*weights)[first_station + 3][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 5 + float2 weight_04; + + if (first_station + 4 < NR_STATIONS) + weight_04 = (*weights)[first_station + 4][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 6 + float2 weight_05; + + if (first_station + 5 < NR_STATIONS) + weight_05 = (*weights)[first_station + 5][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 7 + float2 weight_06; + + if (first_station + 6 < NR_STATIONS) + weight_06 = (*weights)[first_station + 6][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 8 + float2 weight_07; + + if (first_station + 7 < NR_STATIONS) + weight_07 = (*weights)[first_station + 7][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 9 + float2 weight_08; + + if (first_station + 8 < NR_STATIONS) + weight_08 = (*weights)[first_station + 8][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 10 + float2 weight_09; + + if (first_station + 9 < NR_STATIONS) + weight_09 = (*weights)[first_station + 9][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 11 + float2 weight_10; + + if (first_station + 10 < NR_STATIONS) + weight_10 = (*weights)[first_station + 10][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 12 + float2 weight_11; + + if (first_station + 11 < NR_STATIONS) + weight_11 = (*weights)[first_station + 11][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 13 + float2 weight_12; + + if (first_station + 12 < NR_STATIONS) + weight_12 = (*weights)[first_station + 12][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 14 + float2 weight_13; + + if (first_station + 13 < NR_STATIONS) + weight_13 = (*weights)[first_station + 13][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 15 + float2 weight_14; + + if (first_station + 14 < NR_STATIONS) + weight_14 = (*weights)[first_station + 14][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 16 + float2 weight_15; + + if (first_station + 15 < NR_STATIONS) + weight_15 = (*weights)[first_station + 15][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 17 + float2 weight_16; + + if (first_station + 16 < NR_STATIONS) + weight_16 = (*weights)[first_station + 16][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 18 + float2 weight_17; + + if (first_station + 17 < NR_STATIONS) + weight_17 = (*weights)[first_station + 17][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 19 + float2 weight_18; + + if (first_station + 18 < NR_STATIONS) + weight_18 = (*weights)[first_station + 18][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 20 + float2 weight_19; + + if (first_station + 19 < NR_STATIONS) + weight_19 = (*weights)[first_station + 19][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 21 + float2 weight_20; + + if (first_station + 20 < NR_STATIONS) + weight_20 = (*weights)[first_station + 20][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 22 + float2 weight_21; + + if (first_station + 21 < NR_STATIONS) + weight_21 = (*weights)[first_station + 21][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 23 + float2 weight_22; + + if (first_station + 22 < NR_STATIONS) + weight_22 = (*weights)[first_station + 22][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 24 + float2 weight_23; + + if (first_station + 23 < NR_STATIONS) + weight_23 = (*weights)[first_station + 23][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 25 + float2 weight_24; + + if (first_station + 24 < NR_STATIONS) + weight_24 = (*weights)[first_station + 24][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 26 + float2 weight_25; + + if (first_station + 25 < NR_STATIONS) + weight_25 = (*weights)[first_station + 25][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 27 + float2 weight_26; + + if (first_station + 26 < NR_STATIONS) + weight_26 = (*weights)[first_station + 26][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 28 + float2 weight_27; + + if (first_station + 27 < NR_STATIONS) + weight_27 = (*weights)[first_station + 27][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 29 + float2 weight_28; + + if (first_station + 28 < NR_STATIONS) + weight_28 = (*weights)[first_station + 28][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 30 + float2 weight_29; + + if (first_station + 29 < NR_STATIONS) + weight_29 = (*weights)[first_station + 29][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 31 + float2 weight_30; + + if (first_station + 30 < NR_STATIONS) + weight_30 = (*weights)[first_station + 30][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 32 + float2 weight_31; + + if (first_station + 31 < NR_STATIONS) + weight_31 = (*weights)[first_station + 31][channel][tab]; +#endif + + for (uint time = 0; time < NR_TIMES_PER_BLOCK; time += 16) { +#if 1 + for (uint i = get_local_id(0) + 16 * get_local_id(1); i < NR_STATIONS_PER_PASS * 16; i += get_local_size(0) * get_local_size(1)) { + uint t = i % 16; + uint s = i / 16; + + if (NR_TIMES_PER_BLOCK % 16 == 0 || time + t < NR_TIMES_PER_BLOCK) + if (NR_STATIONS % NR_STATIONS_PER_PASS == 0 || first_station + s < NR_STATIONS) + local_samples[0][i] = (*samples)[first_station + s][channel][time + t]; + } + + barrier(CLK_LOCAL_MEM_FENCE); +#endif + + /*for (uint t = 0; t < (NR_TIMES_PER_BLOCK % 16 == 0 ? 16 : min(16U, NR_TIMES_PER_BLOCK - time)); t ++)*/ { + float4 sum = first_station == 0 ? 0 : (float4) ((*complexVoltages)[tab][0][channel][time + minor_time], (*complexVoltages)[tab][1][channel][time + minor_time]); + +#if NR_STATIONS_PER_PASS >= 1 + if (first_station + 1 < NR_STATIONS) { + sample = local_samples[0][minor_time]; + sum += weight_00.xxxx * sample; + sum += weight_00.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 2 + if (first_station + 2 < NR_STATIONS) { + sample = local_samples[1][minor_time]; + sum += weight_01.xxxx * sample; + sum += weight_01.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 3 + if (first_station + 3 < NR_STATIONS) { + sample = local_samples[2][minor_time]; + sum += weight_02.xxxx * sample; + sum += weight_02.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 4 + if (first_station + 4 < NR_STATIONS) { + sample = local_samples[3][minor_time]; + sum += weight_03.xxxx * sample; + sum += weight_03.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 5 + if (first_station + 5 < NR_STATIONS) { + sample = local_samples[4][minor_time]; + sum += weight_04.xxxx * sample; + sum += weight_04.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 6 + if (first_station + 6 < NR_STATIONS) { + sample = local_samples[5][minor_time]; + sum += weight_05.xxxx * sample; + sum += weight_05.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 7 + if (first_station + 7 < NR_STATIONS) { + sample = local_samples[6][minor_time]; + sum += weight_06.xxxx * sample; + sum += weight_06.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 8 + if (first_station + 8 < NR_STATIONS) { + sample = local_samples[7][minor_time]; + sum += weight_07.xxxx * sample; + sum += weight_07.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 9 + if (first_station + 9 < NR_STATIONS) { + sample = local_samples[8][minor_time]; + sum += weight_08.xxxx * sample; + sum += weight_08.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 10 + if (first_station + 10 < NR_STATIONS) { + sample = local_samples[9][minor_time]; + sum += weight_09.xxxx * sample; + sum += weight_09.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 11 + if (first_station + 11 < NR_STATIONS) { + sample = local_samples[10][minor_time]; + sum += weight_10.xxxx * sample; + sum += weight_10.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 12 + if (first_station + 12 < NR_STATIONS) { + sample = local_samples[11][minor_time]; + sum += weight_11.xxxx * sample; + sum += weight_11.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 13 + if (first_station + 13 < NR_STATIONS) { + sample = local_samples[12][minor_time]; + sum += weight_12.xxxx * sample; + sum += weight_12.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 14 + if (first_station + 14 < NR_STATIONS) { + sample = local_samples[13][minor_time]; + sum += weight_13.xxxx * sample; + sum += weight_13.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 15 + if (first_station + 15 < NR_STATIONS) { + sample = local_samples[14][minor_time]; + sum += weight_14.xxxx * sample; + sum += weight_14.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 16 + if (first_station + 15 < NR_STATIONS) { + sample = local_samples[15][minor_time]; + sum += weight_15.xxxx * sample; + sum += weight_15.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 17 + if (first_station + 16 < NR_STATIONS) { + sample = local_samples[16][minor_time]; + sum += weight_16.xxxx * sample; + sum += weight_16.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 18 + if (first_station + 17 < NR_STATIONS) { + sample = local_samples[17][minor_time]; + sum += weight_17.xxxx * sample; + sum += weight_17.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 19 + if (first_station + 18 < NR_STATIONS) { + sample = local_samples[18][minor_time]; + sum += weight_18.xxxx * sample; + sum += weight_18.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 20 + if (first_station + 19 < NR_STATIONS) { + sample = local_samples[19][minor_time]; + sum += weight_19.xxxx * sample; + sum += weight_19.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 21 + if (first_station + 20 < NR_STATIONS) { + sample = local_samples[20][minor_time]; + sum += weight_20.xxxx * sample; + sum += weight_20.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 22 + if (first_station + 21 < NR_STATIONS) { + sample = local_samples[21][minor_time]; + sum += weight_21.xxxx * sample; + sum += weight_21.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 23 + if (first_station + 22 < NR_STATIONS) { + sample = local_samples[22][minor_time]; + sum += weight_22.xxxx * sample; + sum += weight_22.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 24 + if (first_station + 23 < NR_STATIONS) { + sample = local_samples[23][minor_time]; + sum += weight_23.xxxx * sample; + sum += weight_23.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 25 + if (first_station + 25 < NR_STATIONS) { + sample = local_samples[24][minor_time]; + sum += weight_24.xxxx * sample; + sum += weight_24.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 26 + if (first_station + 25 < NR_STATIONS) { + sample = local_samples[25][minor_time]; + sum += weight_25.xxxx * sample; + sum += weight_25.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 27 + if (first_station + 26 < NR_STATIONS) { + sample = local_samples[26][minor_time]; + sum += weight_26.xxxx * sample; + sum += weight_26.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 28 + if (first_station + 27 < NR_STATIONS) { + sample = local_samples[27][minor_time]; + sum += weight_27.xxxx * sample; + sum += weight_27.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 29 + if (first_station + 28 < NR_STATIONS) { + sample = local_samples[28][minor_time]; + sum += weight_28.xxxx * sample; + sum += weight_28.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 30 + if (first_station + 29 < NR_STATIONS) { + sample = local_samples[29][minor_time]; + sum += weight_29.xxxx * sample; + sum += weight_29.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 31 + if (first_station + 30 < NR_STATIONS) { + sample = local_samples[30][minor_time]; + sum += weight_30.xxxx * sample; + sum += weight_30.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + +#if NR_STATIONS_PER_PASS >= 32 + if (first_station + 31 < NR_STATIONS) { + sample = local_samples[31][minor_time]; + sum += weight_31.xxxx * sample; + sum += weight_31.yyyy * (float4) (-sample.y, sample.x, -sample.w, sample.z); + } +#endif + + (*complexVoltages)[tab][0][channel][time + minor_time] = sum.xy; + (*complexVoltages)[tab][1][channel][time + minor_time] = sum.zw; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + } +} diff --git a/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.not b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.not new file mode 100644 index 0000000000000000000000000000000000000000..ff5e84018cbd92de14bd35481f16987c2f294d41 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.not @@ -0,0 +1,148 @@ +#define NR_CHANNELS_PER_BLOCK 2 + + +typedef __global float4 (*ComplexVoltagesType)[NR_TABS][NR_SAMPLES_PER_INTEGRATION][NR_CHANNELS]; +//typedef __global float4 (*CorrectedDataType)[NR_STATIONS][NR_CHANNELS][NR_SAMPLES_PER_INTEGRATION]; +typedef __global float4 (*CorrectedDataType)[NR_STATIONS][NR_SAMPLES_PER_INTEGRATION][NR_CHANNELS]; +typedef __global float2 (*WeightsType)[NR_STATIONS][NR_CHANNELS][NR_TABS]; + +float2 cmul(float2 a, float2 b) +{ + return (float2) { a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x }; +} + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *correctedDataPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + CorrectedDataType correctedData = (CorrectedDataType) correctedDataPtr; + WeightsType weights = (WeightsType) weightsPtr; + + uint first_tab = 3 * get_local_id(0); + uint channel = get_global_id(1); + uint first_station = 6 * get_local_id(2); + + float2 weight_0_0 = (*weights)[first_station + 0][channel][first_tab + 0]; + float2 weight_0_1 = (*weights)[first_station + 0][channel][first_tab + 1]; + float2 weight_0_2 = (*weights)[first_station + 0][channel][first_tab + 2]; + float2 weight_1_0 = (*weights)[first_station + 1][channel][first_tab + 0]; + float2 weight_1_1 = (*weights)[first_station + 1][channel][first_tab + 1]; + float2 weight_1_2 = (*weights)[first_station + 1][channel][first_tab + 2]; + float2 weight_2_0 = (*weights)[first_station + 2][channel][first_tab + 0]; + float2 weight_2_1 = (*weights)[first_station + 2][channel][first_tab + 1]; + float2 weight_2_2 = (*weights)[first_station + 2][channel][first_tab + 2]; + float2 weight_3_0 = (*weights)[first_station + 3][channel][first_tab + 0]; + float2 weight_3_1 = (*weights)[first_station + 3][channel][first_tab + 1]; + float2 weight_3_2 = (*weights)[first_station + 3][channel][first_tab + 2]; + float2 weight_4_0 = (*weights)[first_station + 4][channel][first_tab + 0]; + float2 weight_4_1 = (*weights)[first_station + 4][channel][first_tab + 1]; + float2 weight_4_2 = (*weights)[first_station + 4][channel][first_tab + 2]; + float2 weight_5_0 = (*weights)[first_station + 5][channel][first_tab + 0]; + float2 weight_5_1 = (*weights)[first_station + 5][channel][first_tab + 1]; + float2 weight_5_2 = (*weights)[first_station + 5][channel][first_tab + 2]; + + __local float4 local_sums[3][NR_STATIONS / 6 - 1][NR_CHANNELS_PER_BLOCK][NR_TABS / 3]; + + for (int i = 0; i < 2 * get_local_id(2); i ++) + barrier(CLK_LOCAL_MEM_FENCE); + + for (int time = 0; time < (int) NR_SAMPLES_PER_INTEGRATION; time ++) { + barrier(CLK_LOCAL_MEM_FENCE); + + float4 sum_0, sum_1, sum_2; + + if (first_station == 0) { + sum_0 = sum_1 = sum_2 = (float4) { 0, 0, 0, 0 }; + } else { + sum_0 = local_sums[0][get_local_id(2) - 1][get_local_id(1)][get_local_id(0)]; + sum_1 = local_sums[1][get_local_id(2) - 1][get_local_id(1)][get_local_id(0)]; + sum_2 = local_sums[2][get_local_id(2) - 1][get_local_id(1)][get_local_id(0)]; + } + + float4 sample_0 = (*correctedData)[first_station + 0][time][channel]; + float4 sample_1 = (*correctedData)[first_station + 1][time][channel]; + float4 sample_2 = (*correctedData)[first_station + 2][time][channel]; + float4 sample_3 = (*correctedData)[first_station + 3][time][channel]; + float4 sample_4 = (*correctedData)[first_station + 4][time][channel]; + float4 sample_5 = (*correctedData)[first_station + 5][time][channel]; + + sum_0 += weight_0_0.xxxx * sample_0; + sum_1 += weight_0_1.xxxx * sample_0; + sum_2 += weight_0_2.xxxx * sample_0; + float4 sample_0_tr = (float4) { -sample_0.y, sample_0.x, -sample_0.w, sample_0.z }; + sum_0 += weight_0_0.yyyy * sample_0_tr; + sum_1 += weight_0_1.yyyy * sample_0_tr; + sum_2 += weight_0_2.yyyy * sample_0_tr; + + sum_0 += weight_1_0.xxxx * sample_1; + sum_1 += weight_1_1.xxxx * sample_1; + sum_2 += weight_1_2.xxxx * sample_1; + float4 sample_1_tr = (float4) { -sample_1.y, sample_1.x, -sample_1.w, sample_1.z }; + sum_0 += weight_1_0.yyyy * sample_1_tr; + sum_1 += weight_1_1.yyyy * sample_1_tr; + sum_2 += weight_1_2.yyyy * sample_1_tr; + + sum_0 += weight_2_0.xxxx * sample_2; + sum_1 += weight_2_1.xxxx * sample_2; + sum_2 += weight_2_2.xxxx * sample_2; + float4 sample_2_tr = (float4) { -sample_2.y, sample_2.x, -sample_2.w, sample_2.z }; + sum_0 += weight_2_0.yyyy * sample_2_tr; + sum_1 += weight_2_1.yyyy * sample_2_tr; + sum_2 += weight_2_2.yyyy * sample_2_tr; + + sum_0 += weight_3_0.xxxx * sample_3; + sum_1 += weight_3_1.xxxx * sample_3; + sum_2 += weight_3_2.xxxx * sample_3; + float4 sample_3_tr = (float4) { -sample_3.y, sample_3.x, -sample_3.w, sample_3.z }; + sum_0 += weight_3_0.yyyy * sample_3_tr; + sum_1 += weight_3_1.yyyy * sample_3_tr; + sum_2 += weight_3_2.yyyy * sample_3_tr; + + sum_0 += weight_4_0.xxxx * sample_4; + sum_1 += weight_4_1.xxxx * sample_4; + sum_2 += weight_4_2.xxxx * sample_4; + float4 sample_4_tr = (float4) { -sample_4.y, sample_4.x, -sample_4.w, sample_4.z }; + sum_0 += weight_4_0.yyyy * sample_4_tr; + sum_1 += weight_4_1.yyyy * sample_4_tr; + sum_2 += weight_4_2.yyyy * sample_4_tr; + + sum_0 += weight_5_0.xxxx * sample_5; + sum_1 += weight_5_1.xxxx * sample_5; + sum_2 += weight_5_2.xxxx * sample_5; + float4 sample_5_tr = (float4) { -sample_5.y, sample_5.x, -sample_5.w, sample_5.z }; + sum_0 += weight_5_0.yyyy * sample_5_tr; + sum_1 += weight_5_1.yyyy * sample_5_tr; + sum_2 += weight_5_2.yyyy * sample_5_tr; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (first_station + 6 < NR_STATIONS) { + local_sums[0][get_local_id(2)][get_local_id(1)][get_local_id(0)] = sum_0; + local_sums[1][get_local_id(2)][get_local_id(1)][get_local_id(0)] = sum_1; + local_sums[2][get_local_id(2)][get_local_id(1)][get_local_id(0)] = sum_2; + } else { + (*complexVoltages)[first_tab + 0][time][channel] = sum_0; + (*complexVoltages)[first_tab + 1][time][channel] = sum_1; + (*complexVoltages)[first_tab + 2][time][channel] = sum_2; + } + + for (int i = 0; i < 2 * (get_local_size(2) - 1 - get_local_id(2)); i ++) + barrier(CLK_LOCAL_MEM_FENCE); + +#if 0 +#if defined STOKES_I || defined STOKES_IQUV + float powerX = sum.x * sum.x + sum.y * sum.y; + float powerY = sum.z * sum.z + sum.w * sum.w; + + (*stokes)[tab][0][0][time_or_channel] = powerX + powerY; +#if defined STOKES_IQUV + (*stokes)[tab][1][0][time_or_channel] = powerX - powerY; + (*stokes)[tab][2][0][time_or_channel] = 2 * (sum.x * sum.z + sum.y * sum.w); + (*stokes)[tab][3][0][time_or_channel] = 2 * (sum.y * sum.z - sum.x * sum.w); +#endif +#endif +#endif + } +} diff --git a/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.ok b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.ok new file mode 100644 index 0000000000000000000000000000000000000000..82eba8d0e89e844943f12e801c780347f145bd8f --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.ok @@ -0,0 +1,536 @@ +#define MAX(A,B) ((A)>(B)?(A):(B)) +#define NR_PASSES MAX((NR_STATIONS + 6) / 16, 1) // gives best results on GTX 680 +#define NR_STATIONS_PER_PASS ((NR_STATIONS + NR_PASSES - 1) / NR_PASSES) + +#if NR_STATIONS_PER_PASS > 32 +#error "need more passes to beam form this number of stations" +#endif + + +typedef __global float2 (*ComplexVoltagesType)[NR_CHANNELS][NR_TIMES_PER_BLOCK][NR_TABS][NR_POLARIZATIONS]; +typedef __global float4 (*BandPassCorrectedType)[NR_STATIONS][NR_CHANNELS][NR_TIMES_PER_BLOCK]; +typedef __global float2 (*WeightsType)[NR_STATIONS][NR_CHANNELS][NR_TABS]; + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *samplesPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + BandPassCorrectedType samples = (BandPassCorrectedType) samplesPtr; + WeightsType weights = (WeightsType) weightsPtr; + + uint pol = get_local_id(0); + uint tab = get_local_id(1); + uint channel = get_global_id(2); + + float2 sample; + __local union { + float2 samples[NR_STATIONS_PER_PASS][16][NR_POLARIZATIONS]; + float4 samples4[NR_STATIONS_PER_PASS][16]; + } _local; + +#pragma unroll + for (uint first_station = 0; first_station < NR_STATIONS; first_station += NR_STATIONS_PER_PASS) { +#if NR_STATIONS_PER_PASS >= 1 + float2 weight_00; + + if (first_station + 0 < NR_STATIONS) + weight_00 = (*weights)[first_station + 0][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 2 + float2 weight_01; + + if (first_station + 1 < NR_STATIONS) + weight_01 = (*weights)[first_station + 1][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 3 + float2 weight_02; + + if (first_station + 2 < NR_STATIONS) + weight_02 = (*weights)[first_station + 2][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 4 + float2 weight_03; + + if (first_station + 3 < NR_STATIONS) + weight_03 = (*weights)[first_station + 3][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 5 + float2 weight_04; + + if (first_station + 4 < NR_STATIONS) + weight_04 = (*weights)[first_station + 4][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 6 + float2 weight_05; + + if (first_station + 5 < NR_STATIONS) + weight_05 = (*weights)[first_station + 5][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 7 + float2 weight_06; + + if (first_station + 6 < NR_STATIONS) + weight_06 = (*weights)[first_station + 6][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 8 + float2 weight_07; + + if (first_station + 7 < NR_STATIONS) + weight_07 = (*weights)[first_station + 7][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 9 + float2 weight_08; + + if (first_station + 8 < NR_STATIONS) + weight_08 = (*weights)[first_station + 8][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 10 + float2 weight_09; + + if (first_station + 9 < NR_STATIONS) + weight_09 = (*weights)[first_station + 9][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 11 + float2 weight_10; + + if (first_station + 10 < NR_STATIONS) + weight_10 = (*weights)[first_station + 10][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 12 + float2 weight_11; + + if (first_station + 11 < NR_STATIONS) + weight_11 = (*weights)[first_station + 11][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 13 + float2 weight_12; + + if (first_station + 12 < NR_STATIONS) + weight_12 = (*weights)[first_station + 12][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 14 + float2 weight_13; + + if (first_station + 13 < NR_STATIONS) + weight_13 = (*weights)[first_station + 13][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 15 + float2 weight_14; + + if (first_station + 14 < NR_STATIONS) + weight_14 = (*weights)[first_station + 14][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 16 + float2 weight_15; + + if (first_station + 15 < NR_STATIONS) + weight_15 = (*weights)[first_station + 15][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 17 + float2 weight_16; + + if (first_station + 16 < NR_STATIONS) + weight_16 = (*weights)[first_station + 16][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 18 + float2 weight_17; + + if (first_station + 17 < NR_STATIONS) + weight_17 = (*weights)[first_station + 17][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 19 + float2 weight_18; + + if (first_station + 18 < NR_STATIONS) + weight_18 = (*weights)[first_station + 18][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 20 + float2 weight_19; + + if (first_station + 19 < NR_STATIONS) + weight_19 = (*weights)[first_station + 19][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 21 + float2 weight_20; + + if (first_station + 20 < NR_STATIONS) + weight_20 = (*weights)[first_station + 20][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 22 + float2 weight_21; + + if (first_station + 21 < NR_STATIONS) + weight_21 = (*weights)[first_station + 21][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 23 + float2 weight_22; + + if (first_station + 22 < NR_STATIONS) + weight_22 = (*weights)[first_station + 22][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 24 + float2 weight_23; + + if (first_station + 23 < NR_STATIONS) + weight_23 = (*weights)[first_station + 23][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 25 + float2 weight_24; + + if (first_station + 24 < NR_STATIONS) + weight_24 = (*weights)[first_station + 24][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 26 + float2 weight_25; + + if (first_station + 25 < NR_STATIONS) + weight_25 = (*weights)[first_station + 25][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 27 + float2 weight_26; + + if (first_station + 26 < NR_STATIONS) + weight_26 = (*weights)[first_station + 26][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 28 + float2 weight_27; + + if (first_station + 27 < NR_STATIONS) + weight_27 = (*weights)[first_station + 27][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 29 + float2 weight_28; + + if (first_station + 28 < NR_STATIONS) + weight_28 = (*weights)[first_station + 28][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 30 + float2 weight_29; + + if (first_station + 29 < NR_STATIONS) + weight_29 = (*weights)[first_station + 29][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 31 + float2 weight_30; + + if (first_station + 30 < NR_STATIONS) + weight_30 = (*weights)[first_station + 30][channel][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 32 + float2 weight_31; + + if (first_station + 31 < NR_STATIONS) + weight_31 = (*weights)[first_station + 31][channel][tab]; +#endif + + for (uint time = 0; time < NR_TIMES_PER_BLOCK; time += 16) { + for (uint i = get_local_id(0) + NR_POLARIZATIONS * get_local_id(1); i < NR_STATIONS_PER_PASS * 16; i += NR_TABS * NR_POLARIZATIONS) { + uint t = i % 16; + uint s = i / 16; + + if (NR_TIMES_PER_BLOCK % 16 == 0 || time + t < NR_TIMES_PER_BLOCK) + if (NR_STATIONS % NR_STATIONS_PER_PASS == 0 || first_station + s < NR_STATIONS) + _local.samples4[0][i] = convert_float4((*samples)[first_station + s][channel][time + t]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for (uint t = 0; t < (NR_TIMES_PER_BLOCK % 16 == 0 ? 16 : min(16U, NR_TIMES_PER_BLOCK - time)); t ++) { + float2 sum = first_station == 0 ? 0 : (*complexVoltages)[channel][time + t][tab][pol]; + +#if NR_STATIONS_PER_PASS >= 1 + if (first_station + 1 < NR_STATIONS) { + sample = _local.samples[ 0][t][pol]; + sum += weight_00.xx * sample; + sum += weight_00.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 2 + if (first_station + 2 < NR_STATIONS) { + sample = _local.samples[ 1][t][pol]; + sum += weight_01.xx * sample; + sum += weight_01.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 3 + if (first_station + 3 < NR_STATIONS) { + sample = _local.samples[ 2][t][pol]; + sum += weight_02.xx * sample; + sum += weight_02.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 4 + if (first_station + 4 < NR_STATIONS) { + sample = _local.samples[ 3][t][pol]; + sum += weight_03.xx * sample; + sum += weight_03.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 5 + if (first_station + 5 < NR_STATIONS) { + sample = _local.samples[ 4][t][pol]; + sum += weight_04.xx * sample; + sum += weight_04.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 6 + if (first_station + 6 < NR_STATIONS) { + sample = _local.samples[ 5][t][pol]; + sum += weight_05.xx * sample; + sum += weight_05.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 7 + if (first_station + 7 < NR_STATIONS) { + sample = _local.samples[ 6][t][pol]; + sum += weight_06.xx * sample; + sum += weight_06.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 8 + if (first_station + 8 < NR_STATIONS) { + sample = _local.samples[ 7][t][pol]; + sum += weight_07.xx * sample; + sum += weight_07.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 9 + if (first_station + 9 < NR_STATIONS) { + sample = _local.samples[ 8][t][pol]; + sum += weight_08.xx * sample; + sum += weight_08.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 10 + if (first_station + 10 < NR_STATIONS) { + sample = _local.samples[ 9][t][pol]; + sum += weight_09.xx * sample; + sum += weight_09.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 11 + if (first_station + 11 < NR_STATIONS) { + sample = _local.samples[10][t][pol]; + sum += weight_10.xx * sample; + sum += weight_10.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 12 + if (first_station + 12 < NR_STATIONS) { + sample = _local.samples[11][t][pol]; + sum += weight_11.xx * sample; + sum += weight_11.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 13 + if (first_station + 13 < NR_STATIONS) { + sample = _local.samples[12][t][pol]; + sum += weight_12.xx * sample; + sum += weight_12.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 14 + if (first_station + 14 < NR_STATIONS) { + sample = _local.samples[13][t][pol]; + sum += weight_13.xx * sample; + sum += weight_13.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 15 + if (first_station + 15 < NR_STATIONS) { + sample = _local.samples[14][t][pol]; + sum += weight_14.xx * sample; + sum += weight_14.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 16 + if (first_station + 15 < NR_STATIONS) { + sample = _local.samples[15][t][pol]; + sum += weight_15.xx * sample; + sum += weight_15.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 17 + if (first_station + 16 < NR_STATIONS) { + sample = _local.samples[16][t][pol]; + sum += weight_16.xx * sample; + sum += weight_16.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 18 + if (first_station + 17 < NR_STATIONS) { + sample = _local.samples[17][t][pol]; + sum += weight_17.xx * sample; + sum += weight_17.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 19 + if (first_station + 18 < NR_STATIONS) { + sample = _local.samples[18][t][pol]; + sum += weight_18.xx * sample; + sum += weight_18.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 20 + if (first_station + 19 < NR_STATIONS) { + sample = _local.samples[19][t][pol]; + sum += weight_19.xx * sample; + sum += weight_19.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 21 + if (first_station + 20 < NR_STATIONS) { + sample = _local.samples[20][t][pol]; + sum += weight_20.xx * sample; + sum += weight_20.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 22 + if (first_station + 21 < NR_STATIONS) { + sample = _local.samples[21][t][pol]; + sum += weight_21.xx * sample; + sum += weight_21.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 23 + if (first_station + 22 < NR_STATIONS) { + sample = _local.samples[22][t][pol]; + sum += weight_22.xx * sample; + sum += weight_22.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 24 + if (first_station + 23 < NR_STATIONS) { + sample = _local.samples[23][t][pol]; + sum += weight_23.xx * sample; + sum += weight_23.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 25 + if (first_station + 25 < NR_STATIONS) { + sample = _local.samples[24][t][pol]; + sum += weight_24.xx * sample; + sum += weight_24.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 26 + if (first_station + 25 < NR_STATIONS) { + sample = _local.samples[25][t][pol]; + sum += weight_25.xx * sample; + sum += weight_25.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 27 + if (first_station + 26 < NR_STATIONS) { + sample = _local.samples[26][t][pol]; + sum += weight_26.xx * sample; + sum += weight_26.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 28 + if (first_station + 27 < NR_STATIONS) { + sample = _local.samples[27][t][pol]; + sum += weight_27.xx * sample; + sum += weight_27.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 29 + if (first_station + 28 < NR_STATIONS) { + sample = _local.samples[28][t][pol]; + sum += weight_28.xx * sample; + sum += weight_28.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 30 + if (first_station + 29 < NR_STATIONS) { + sample = _local.samples[29][t][pol]; + sum += weight_29.xx * sample; + sum += weight_29.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 31 + if (first_station + 30 < NR_STATIONS) { + sample = _local.samples[30][t][pol]; + sum += weight_30.xx * sample; + sum += weight_30.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 32 + if (first_station + 31 < NR_STATIONS) { + sample = _local.samples[31][t][pol]; + sum += weight_31.xx * sample; + sum += weight_31.yy * (float2) (-sample.y, sample.x); + } +#endif + + (*complexVoltages)[channel][time + t][tab][pol] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + } +} diff --git a/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.orig b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.orig new file mode 100644 index 0000000000000000000000000000000000000000..cdaceafe25037428ee704b505003e0591a34c220 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/BeamFormer.cl.orig @@ -0,0 +1,149 @@ +typedef __global float4 (*ComplexVoltagesType)[NR_CHANNELS][NR_TIMES_PER_BLOCK][NR_TABS]; +//typedef __global float4 (*CorrectedDataType)[NR_STATIONS][NR_CHANNELS][NR_TIMES_PER_BLOCK]; +typedef __global float4 (*CorrectedDataType)[NR_STATIONS][NR_TIMES_PER_BLOCK][NR_CHANNELS]; +typedef __global float2 (*WeightsType)[NR_STATIONS][NR_CHANNELS][NR_TABS]; + +float2 cmul(float2 a, float2 b) +{ + return (float2) { a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x }; +} + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *correctedDataPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + CorrectedDataType correctedData = (CorrectedDataType) correctedDataPtr; + WeightsType weights = (WeightsType) weightsPtr; + + uint first_tab = 3 * get_local_id(0); + uint first_station = 6 * get_local_id(1); + uint channel = get_global_id(2); + + bool lastGroupOfStations = first_station + 6 == NR_STATIONS; + + float2 weight_0_0 = (*weights)[first_station + 0][channel][first_tab + 0]; + float2 weight_0_1 = (*weights)[first_station + 0][channel][first_tab + 1]; + float2 weight_0_2 = (*weights)[first_station + 0][channel][first_tab + 2]; + float2 weight_1_0 = (*weights)[first_station + 1][channel][first_tab + 0]; + float2 weight_1_1 = (*weights)[first_station + 1][channel][first_tab + 1]; + float2 weight_1_2 = (*weights)[first_station + 1][channel][first_tab + 2]; + float2 weight_2_0 = (*weights)[first_station + 2][channel][first_tab + 0]; + float2 weight_2_1 = (*weights)[first_station + 2][channel][first_tab + 1]; + float2 weight_2_2 = (*weights)[first_station + 2][channel][first_tab + 2]; + float2 weight_3_0 = (*weights)[first_station + 3][channel][first_tab + 0]; + float2 weight_3_1 = (*weights)[first_station + 3][channel][first_tab + 1]; + float2 weight_3_2 = (*weights)[first_station + 3][channel][first_tab + 2]; + float2 weight_4_0 = (*weights)[first_station + 4][channel][first_tab + 0]; + float2 weight_4_1 = (*weights)[first_station + 4][channel][first_tab + 1]; + float2 weight_4_2 = (*weights)[first_station + 4][channel][first_tab + 2]; + float2 weight_5_0 = (*weights)[first_station + 5][channel][first_tab + 0]; + float2 weight_5_1 = (*weights)[first_station + 5][channel][first_tab + 1]; + float2 weight_5_2 = (*weights)[first_station + 5][channel][first_tab + 2]; + + __local float4 local_sums[3][NR_STATIONS / 6][NR_TABS / 3]; + float4 sample_0, sample_1, sample_2, sample_3, sample_4, sample_5; + float4 sum_0, sum_1, sum_2; + + if (first_station == 0) { + local_sums[0][0][get_local_id(0)] = (float4) { 0, 0, 0, 0 }; + local_sums[1][0][get_local_id(0)] = (float4) { 0, 0, 0, 0 }; + local_sums[2][0][get_local_id(0)] = (float4) { 0, 0, 0, 0 }; + } + + for (int time = 0 - get_local_id(1); time < (int) (NR_TIMES_PER_BLOCK + NR_STATIONS / 6 - 1 - get_local_id(1)); time ++) { + bool validTime = time >= 0 && time < NR_TIMES_PER_BLOCK; + + if (validTime) { + sample_0 = (*correctedData)[first_station + 0][time][channel]; + sample_1 = (*correctedData)[first_station + 1][time][channel]; + sample_2 = (*correctedData)[first_station + 2][time][channel]; + sample_3 = (*correctedData)[first_station + 3][time][channel]; + sample_4 = (*correctedData)[first_station + 4][time][channel]; + sample_5 = (*correctedData)[first_station + 5][time][channel]; + + sum_0 = local_sums[0][get_local_id(1)][get_local_id(0)]; + sum_1 = local_sums[1][get_local_id(1)][get_local_id(0)]; + sum_2 = local_sums[2][get_local_id(1)][get_local_id(0)]; + + sum_0 += weight_0_0.xxxx * sample_0; + sum_1 += weight_0_1.xxxx * sample_0; + sum_2 += weight_0_2.xxxx * sample_0; + float4 sample_0_tr = (float4) { -sample_0.y, sample_0.x, -sample_0.w, sample_0.z }; + sum_0 += weight_0_0.yyyy * sample_0_tr; + sum_1 += weight_0_1.yyyy * sample_0_tr; + sum_2 += weight_0_2.yyyy * sample_0_tr; + + sum_0 += weight_1_0.xxxx * sample_1; + sum_1 += weight_1_1.xxxx * sample_1; + sum_2 += weight_1_2.xxxx * sample_1; + float4 sample_1_tr = (float4) { -sample_1.y, sample_1.x, -sample_1.w, sample_1.z }; + sum_0 += weight_1_0.yyyy * sample_1_tr; + sum_1 += weight_1_1.yyyy * sample_1_tr; + sum_2 += weight_1_2.yyyy * sample_1_tr; + + sum_0 += weight_2_0.xxxx * sample_2; + sum_1 += weight_2_1.xxxx * sample_2; + sum_2 += weight_2_2.xxxx * sample_2; + float4 sample_2_tr = (float4) { -sample_2.y, sample_2.x, -sample_2.w, sample_2.z }; + sum_0 += weight_2_0.yyyy * sample_2_tr; + sum_1 += weight_2_1.yyyy * sample_2_tr; + sum_2 += weight_2_2.yyyy * sample_2_tr; + + sum_0 += weight_3_0.xxxx * sample_3; + sum_1 += weight_3_1.xxxx * sample_3; + sum_2 += weight_3_2.xxxx * sample_3; + float4 sample_3_tr = (float4) { -sample_3.y, sample_3.x, -sample_3.w, sample_3.z }; + sum_0 += weight_3_0.yyyy * sample_3_tr; + sum_1 += weight_3_1.yyyy * sample_3_tr; + sum_2 += weight_3_2.yyyy * sample_3_tr; + + sum_0 += weight_4_0.xxxx * sample_4; + sum_1 += weight_4_1.xxxx * sample_4; + sum_2 += weight_4_2.xxxx * sample_4; + float4 sample_4_tr = (float4) { -sample_4.y, sample_4.x, -sample_4.w, sample_4.z }; + sum_0 += weight_4_0.yyyy * sample_4_tr; + sum_1 += weight_4_1.yyyy * sample_4_tr; + sum_2 += weight_4_2.yyyy * sample_4_tr; + + sum_0 += weight_5_0.xxxx * sample_5; + sum_1 += weight_5_1.xxxx * sample_5; + sum_2 += weight_5_2.xxxx * sample_5; + float4 sample_5_tr = (float4) { -sample_5.y, sample_5.x, -sample_5.w, sample_5.z }; + sum_0 += weight_5_0.yyyy * sample_5_tr; + sum_1 += weight_5_1.yyyy * sample_5_tr; + sum_2 += weight_5_2.yyyy * sample_5_tr; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (validTime) { + if (lastGroupOfStations) { + (*complexVoltages)[channel][time][first_tab + 0] = sum_0; + (*complexVoltages)[channel][time][first_tab + 1] = sum_1; + (*complexVoltages)[channel][time][first_tab + 2] = sum_2; + } else { + local_sums[0][get_local_id(1) + 1][get_local_id(0)] = sum_0; + local_sums[1][get_local_id(1) + 1][get_local_id(0)] = sum_1; + local_sums[2][get_local_id(1) + 1][get_local_id(0)] = sum_2; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + +#if 0 +#if defined STOKES_I || defined STOKES_IQUV + float powerX = sum.x * sum.x + sum.y * sum.y; + float powerY = sum.z * sum.z + sum.w * sum.w; + + (*stokes)[tab][0][0][time_or_channel] = powerX + powerY; +#if defined STOKES_IQUV + (*stokes)[tab][1][0][time_or_channel] = powerX - powerY; + (*stokes)[tab][2][0][time_or_channel] = 2 * (sum.x * sum.z + sum.y * sum.w); + (*stokes)[tab][3][0][time_or_channel] = 2 * (sum.y * sum.z - sum.x * sum.w); +#endif +#endif +#endif + } +} diff --git a/RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl b/RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl new file mode 100644 index 0000000000000000000000000000000000000000..ba32ea6d37101f2fd6d36924e4fdded7cf0fcdfd --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl @@ -0,0 +1,130 @@ +__kernel void computeStokes(__global void *restrict stokesDataPtr, + __global const void *restrict complexVoltagesPtr) +{ + typedef __global float (*StokesType)[NR_TABS][NR_COHERENT_STOKES][NR_SAMPLES_PER_CHANNEL / COHERENT_STOKES_TIME_INTEGRATION_FACTOR][NR_CHANNELS]; + typedef __global float4 (*ComplexVoltagesType)[NR_CHANNELS][NR_SAMPLES_PER_CHANNEL / COHERENT_STOKES_TIME_INTEGRATION_FACTOR][COHERENT_STOKES_TIME_INTEGRATION_FACTOR][NR_TABS]; + + StokesType stokesData = (StokesType) stokesDataPtr; + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + + __local float tmp[NR_COHERENT_STOKES][16][17]; + + uint tabBase = 16 * get_global_id(1); + uint chBase = 16 * get_global_id(2); + + uint tabOffsetR = get_local_id(0) & 15; + uint tabR = tabBase + tabOffsetR; + uint chOffsetR = get_local_id(0) >> 4; + uint channelR = chBase + chOffsetR; + bool doR = NR_TABS % 16 == 0 || tabR < NR_TABS; + + uint tabOffsetW = get_local_id(0) >> 4; + uint tabW = tabBase + tabOffsetW; + uint chOffsetW = get_local_id(0) & 15; + uint channelW = chBase + chOffsetW; + bool doW = NR_TABS % 16 == 0 || tabW < NR_TABS; + + for (uint time = 0; time < NR_SAMPLES_PER_CHANNEL / COHERENT_STOKES_TIME_INTEGRATION_FACTOR; time ++) { + float stokesI = 0; +#if NR_COHERENT_STOKES == 4 + float stokesQ = 0, halfStokesU = 0, halfStokesV = 0; +#endif + + if (doR) { + for (uint t = 0; t < COHERENT_STOKES_TIME_INTEGRATION_FACTOR; t ++) { + float4 sample = (*complexVoltages)[channelR][time][t][tabR]; + float2 X = sample.xy, Y = sample.zw; + float powerX = X.x * X.x + X.y * X.y; + float powerY = Y.x * Y.x + Y.y * Y.y; + stokesI += powerX + powerY; +#if NR_COHERENT_STOKES == 4 + stokesQ += powerX - powerY; + halfStokesU += X.x * Y.x + X.y * Y.y; + halfStokesV += X.y * Y.x - X.x * Y.y; +#endif + } + + tmp[0][tabOffsetR][chOffsetR] = stokesI; +#if NR_COHERENT_STOKES == 4 + tmp[1][tabOffsetR][chOffsetR] = stokesQ; + tmp[2][tabOffsetR][chOffsetR] = 2 * halfStokesU; + tmp[3][tabOffsetR][chOffsetR] = 2 * halfStokesV; +#endif + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (doW) + for (uint stokes = 0; stokes < NR_COHERENT_STOKES; stokes ++) + (*stokesData)[tabW][stokes][time][channelW] = tmp[stokes][tabOffsetW][chOffsetW]; + + barrier(CLK_LOCAL_MEM_FENCE); + } +} + + +#if 0 +__kernel void computeStokes(__global void *restrict stokesDataPtr, + __global const void *restrict dedispersedDataPtr) +{ + typedef __global float (*StokesType)[NR_TABS][NR_COHERENT_STOKES][NR_SAMPLES_PER_CHANNEL / COHERENT_STOKES_TIME_INTEGRATION_FACTOR][NR_CHANNELS]; + typedef __global float2 (*DedispersedDataType)[NR_TABS][NR_POLARIZATIONS][NR_CHANNELS][NR_SAMPLES_PER_CHANNEL]; + + StokesType stokesData = (StokesType) stokesDataPtr; + DedispersedDataType dedispersedData = (DedispersedDataType) dedispersedDataPtr; + + __local float tmp[NR_COHERENT_STOKES][16][17]; + + uint timeBase = 16 * get_global_id(1); + uint chBase = 16 * get_global_id(2); + + uint timeOffsetR = get_local_id(0) & 15; + uint timeR = timeBase + tabOffsetR; + uint chOffsetR = get_local_id(0) >> 4; + uint channelR = chBase + chOffsetR; + bool doR = NR_TABS % 16 == 0 || tabR < NR_TABS; + + uint tabOffsetW = get_local_id(0) >> 4; + uint tabW = tabBase + tabOffsetW; + uint chOffsetW = get_local_id(0) & 15; + uint channelW = chBase + chOffsetW; + bool doW = NR_TABS % 16 == 0 || tabW < NR_TABS; + + for (uint time = 0; time < NR_SAMPLES_PER_CHANNEL / COHERENT_STOKES_TIME_INTEGRATION_FACTOR; time ++) { + float stokesI = 0; +#if NR_COHERENT_STOKES == 4 + float stokesQ = 0, halfStokesU = 0, halfStokesV = 0; +#endif + + if (doR) { + for (uint t = 0; t < COHERENT_STOKES_TIME_INTEGRATION_FACTOR; t ++) { + float4 sample = (*complexVoltages)[channelR][time][t][tabR]; + float2 X = sample.xy, Y = sample.zw; + float powerX = X.x * X.x + X.y * X.y; + float powerY = Y.x * Y.x + Y.y * Y.y; + stokesI += powerX + powerY; +#if NR_COHERENT_STOKES == 4 + stokesQ += powerX - powerY; + halfStokesU += X.x * Y.x + X.y * Y.y; + halfStokesV += X.y * Y.x - X.x * Y.y; +#endif + } + + tmp[0][tabOffsetR][chOffsetR] = stokesI; +#if NR_COHERENT_STOKES == 4 + tmp[1][tabOffsetR][chOffsetR] = stokesQ; + tmp[2][tabOffsetR][chOffsetR] = 2 * halfStokesU; + tmp[3][tabOffsetR][chOffsetR] = 2 * halfStokesV; +#endif + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (doW) + for (uint stokes = 0; stokes < NR_COHERENT_STOKES; stokes ++) + (*stokesData)[tabW][stokes][time][channelW] = tmp[stokes][tabOffsetW][chOffsetW]; + + barrier(CLK_LOCAL_MEM_FENCE); + } +} +#endif diff --git a/RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl-0.ptx b/RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..95682a21f13f73a46e51290282c947066bcf9e94 Binary files /dev/null and b/RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl-0.ptx differ diff --git a/RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl.ok b/RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl.ok new file mode 100644 index 0000000000000000000000000000000000000000..be45251af7d4067667abd7f9af7c7f325a182e06 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/CoherentStokes.cl.ok @@ -0,0 +1,63 @@ +__kernel void computeStokes(__global void *restrict stokesDataPtr, + __global const void *restrict complexVoltagesPtr) +{ + typedef __global float (*StokesType)[NR_TABS][NR_STOKES][NR_TIMES_PER_BLOCK / STOKES_INTEGRATION_SAMPLES][NR_CHANNELS]; + typedef __global float4 (*ComplexVoltagesType)[NR_CHANNELS][NR_TIMES_PER_BLOCK / STOKES_INTEGRATION_SAMPLES][STOKES_INTEGRATION_SAMPLES][NR_TABS]; + + StokesType stokesData = (StokesType) stokesDataPtr; + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + + __local float tmp[NR_STOKES][16][17]; + + uint tabBase = 16 * get_global_id(1); + uint chBase = 16 * get_global_id(2); + + uint tabOffsetR = get_local_id(0) & 15; + uint tabR = tabBase + tabOffsetR; + uint chOffsetR = get_local_id(0) >> 4; + uint channelR = chBase + chOffsetR; + bool doR = NR_TABS % 16 == 0 || tabR < NR_TABS; + + uint tabOffsetW = get_local_id(0) >> 4; + uint tabW = tabBase + tabOffsetW; + uint chOffsetW = get_local_id(0) & 15; + uint channelW = chBase + chOffsetW; + bool doW = NR_TABS % 16 == 0 || tabW < NR_TABS; + + for (uint time = 0; time < NR_TIMES_PER_BLOCK / STOKES_INTEGRATION_SAMPLES; time ++) { + float stokesI = 0; +#if NR_STOKES == 4 + float stokesQ = 0, halfStokesU = 0, halfStokesV = 0; +#endif + + if (doR) { + for (uint t = 0; t < STOKES_INTEGRATION_SAMPLES; t ++) { + float4 sample = (*complexVoltages)[channelR][time][t][tabR]; + float2 X = sample.xy, Y = sample.zw; + float powerX = X.x * X.x + X.y * X.y; + float powerY = Y.x * Y.x + Y.y * Y.y; + stokesI += powerX + powerY; +#if NR_STOKES == 4 + stokesQ += powerX - powerY; + halfStokesU += X.x * Y.x + X.y * Y.y; + halfStokesV += X.y * Y.x - X.x * Y.y; +#endif + } + + tmp[0][tabOffsetR][chOffsetR] = stokesI; +#if NR_STOKES == 4 + tmp[1][tabOffsetR][chOffsetR] = stokesQ; + tmp[2][tabOffsetR][chOffsetR] = 2 * halfStokesU; + tmp[3][tabOffsetR][chOffsetR] = 2 * halfStokesV; +#endif + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (doW) + for (uint stokes = 0; stokes < NR_STOKES; stokes ++) + (*stokesData)[tabW][stokes][time][channelW] = tmp[stokes][tabOffsetW][chOffsetW]; + + barrier(CLK_LOCAL_MEM_FENCE); + } +} diff --git a/RTCP/GPUProc/src/BeamFormer/Dedispersion.cl b/RTCP/GPUProc/src/BeamFormer/Dedispersion.cl new file mode 100644 index 0000000000000000000000000000000000000000..bffddc4e9d0a052dd4570269e89700b2faac9046 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/Dedispersion.cl @@ -0,0 +1,61 @@ +#include "math.cl" + + +// | DM beam pol ch subtime subch. | SB +// -------+-------------------------------+--- +// taper | x | x +// factor | x x x | x +// sample | x x x x x | x + +#define CHANNEL_BANDWIDTH (SUBBAND_BANDWIDTH / NR_CHANNELS) +#define SUB_CHANNEL_BANDWIDTH (CHANNEL_BANDWIDTH / DEDISPERSION_FFT_SIZE) + +typedef __global float2 (*BufferType)[NR_TABS][NR_POLARIZATIONS][NR_CHANNELS][NR_SAMPLES_PER_CHANNEL / DEDISPERSION_FFT_SIZE][DEDISPERSION_FFT_SIZE]; + + +__kernel void applyChirp(__global void *bufferPtr, + __global float *DMs, + float subbandFrequency) +{ + __local float local_DMs[NR_TABS]; + + for (int i = get_local_id(0); i < NR_TABS; i += get_local_size(0)) + local_DMs[i] = DMs[i] * 2.0f * (float) M_PI * 4.149e15f; + + barrier(CLK_LOCAL_MEM_FENCE); + + BufferType buffer = (BufferType) bufferPtr; + + uint subChannel = get_global_id(0); + uint time = get_global_id(1); + uint channel = get_global_id(2); + +#if NR_CHANNELS > 1 + float subbandBaseFrequency = subbandFrequency - .5f * (float) SUBBAND_BANDWIDTH; + float channel0frequency = subbandBaseFrequency + channel * CHANNEL_BANDWIDTH; +#else + float channel0frequency = subbandFrequency; +#endif + + float binFrequency = subChannel * SUB_CHANNEL_BANDWIDTH; + + if (subChannel > DEDISPERSION_FFT_SIZE) + binFrequency -= CHANNEL_BANDWIDTH; + + float taper = native_rsqrt(1 + pow(binFrequency / (.47f * (float) CHANNEL_BANDWIDTH), 80.0f)) * DEDISPERSION_FFT_SIZE; + float frequencyDiv = binFrequency / channel0frequency; + float frequencyFac = frequencyDiv * frequencyDiv / (channel0frequency + binFrequency); + + for (uint tab = 0; tab < NR_TABS; tab ++) { + float DM = local_DMs[tab]; + + /* if (DM > 0) */ { + float2 sampleX = (*buffer)[tab][0][channel][time][subChannel]; + float2 sampleY = (*buffer)[tab][1][channel][time][subChannel]; + float2 factor = cexp(DM * frequencyFac) * taper; + + (*buffer)[tab][0][channel][time][subChannel] = cmul(factor, sampleX); + (*buffer)[tab][1][channel][time][subChannel] = cmul(factor, sampleY); + } + } +} diff --git a/RTCP/GPUProc/src/BeamFormer/Dedispersion.cl-0.ptx b/RTCP/GPUProc/src/BeamFormer/Dedispersion.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..221a542fb5fdfc1737730694aaf0921d00774dfb --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/Dedispersion.cl-0.ptx @@ -0,0 +1,417 @@ +// +// Generated by NVIDIA NVVM Compiler +// Compiler built on Sat Sep 29 10:12:13 2012 (1348906333) +// Driver 304.54 +// + +.version 3.0 +.target sm_30, texmode_independent +.address_size 32 + +.extern .shared .align 4 .b8 shr_2_local_DMs[512]; + +.entry applyChirp( + .param .u32 .ptr .global .align 1 applyChirp_param_0, + .param .u32 .ptr .global .align 4 applyChirp_param_1, + .param .f32 applyChirp_param_2 +) +{ + .reg .f32 %f<225>; + .reg .pred %p<26>; + .reg .s32 %r<97>; + + + // inline asm + mov.u32 %r32, %tid.x; + // inline asm + setp.gt.s32 %p4, %r32, 127; + mov.u32 %r92, %r32; + @%p4 bra BB0_2; + +BB0_1: + shl.b32 %r34, %r92, 2; + ld.param.u32 %r91, [applyChirp_param_1]; + add.s32 %r35, %r91, %r34; + ld.global.f32 %f29, [%r35]; + add.ftz.f32 %f30, %f29, %f29; + mul.ftz.f32 %f31, %f30, 0f40490FDB; + mul.ftz.f32 %f32, %f31, 0f596BD7E5; + mov.u32 %r36, shr_2_local_DMs; + add.s32 %r37, %r36, %r34; + st.shared.f32 [%r37], %f32; + // inline asm + mov.u32 %r33, %ntid.x; + // inline asm + add.s32 %r92, %r33, %r92; + setp.lt.s32 %p5, %r92, 128; + @%p5 bra BB0_1; + +BB0_2: + bar.sync 0; + // inline asm + mov.u32 %r38, %envreg3; + // inline asm + // inline asm + mov.u32 %r39, %ntid.x; + // inline asm + // inline asm + mov.u32 %r40, %ctaid.x; + // inline asm + // inline asm + mov.u32 %r41, %tid.x; + // inline asm + add.s32 %r50, %r41, %r38; + mad.lo.s32 %r51, %r40, %r39, %r50; + // inline asm + mov.u32 %r42, %envreg4; + // inline asm + // inline asm + mov.u32 %r43, %ntid.y; + // inline asm + // inline asm + mov.u32 %r44, %ctaid.y; + // inline asm + // inline asm + mov.u32 %r45, %tid.y; + // inline asm + // inline asm + mov.u32 %r46, %envreg5; + // inline asm + // inline asm + mov.u32 %r47, %ntid.z; + // inline asm + // inline asm + mov.u32 %r48, %ctaid.z; + // inline asm + // inline asm + mov.u32 %r49, %tid.z; + // inline asm + add.s32 %r52, %r49, %r46; + mad.lo.s32 %r53, %r48, %r47, %r52; + cvt.rn.f32.u32 %f35, %r53; + ld.param.f32 %f217, [applyChirp_param_2]; + add.ftz.f32 %f36, %f217, 0fC7BEBC20; + fma.rn.ftz.f32 %f2, %f35, 0f42BEBC20, %f36; + cvt.rn.f32.u32 %f37, %r51; + mul.ftz.f32 %f38, %f37, 0f403EBC20; + setp.gt.u32 %p6, %r51, 32; + fma.rn.ftz.f32 %f39, %f37, 0f403EBC20, 0fC2BEBC20; + selp.f32 %f3, %f39, %f38, %p6; + div.rn.ftz.f32 %f34, %f3, 0f42334A70; + // inline asm + abs.f32 %f33, %f34; + // inline asm + setp.eq.ftz.f32 %p7, %f34, 0f3F800000; + @%p7 bra BB0_25; + + setp.nan.ftz.f32 %p8, %f34, %f34; + @%p8 bra BB0_24; + + mov.f32 %f6, 0fFF800000; + mov.f32 %f220, 0f42A00000; + mov.f32 %f44, 0f3F000000; + mul.rn.f32 %f41, %f44, %f220; + // inline asm + cvt.rmi.f32.f32 %f40, %f41; + // inline asm + mov.f32 %f45, 0f40000000; + mul.rn.f32 %f46, %f45, %f40; + sub.ftz.f32 %f47, %f220, %f46; + setp.eq.ftz.f32 %p1, %f47, 0f3F800000; + // inline asm + cvt.rzi.f32.f32 %f42, %f220; + // inline asm + setp.eq.ftz.f32 %p2, %f42, 0f42A00000; + and.pred %p3, %p1, %p2; + setp.eq.ftz.f32 %p9, %f33, 0f00000000; + @%p9 bra BB0_23; + + setp.eq.ftz.f32 %p10, %f34, 0f7F800000; + setp.eq.ftz.f32 %p11, %f34, %f6; + or.pred %p12, %p10, %p11; + @%p12 bra BB0_20; + + setp.geu.ftz.f32 %p13, %f34, 0f00000000; + @%p13 bra BB0_8; + + mov.f32 %f49, 0f42A00000; + // inline asm + cvt.rzi.f32.f32 %f48, %f49; + // inline asm + setp.neu.ftz.f32 %p14, %f48, 0f42A00000; + @%p14 bra BB0_19; + +BB0_8: + // inline asm + abs.f32 %f50, %f34; + // inline asm + mov.b32 %r18, %f50; + shr.u32 %r54, %r18, 23; + and.b32 %r55, %r54, 255; + add.s32 %r93, %r55, -127; + setp.eq.s32 %p15, %r55, 0; + mov.f32 %f218, %f50; + @%p15 bra BB0_9; + bra.uni BB0_10; + +BB0_9: + and.b32 %r56, %r18, -2139095041; + or.b32 %r57, %r56, 1065353216; + mov.b32 %f52, %r57; + add.ftz.f32 %f53, %f52, 0fBF800000; + mov.b32 %r58, %f53; + shr.u32 %r59, %r58, 23; + and.b32 %r60, %r59, 255; + add.s32 %r93, %r60, -253; + and.b32 %r61, %r58, -2139095041; + or.b32 %r62, %r61, 1065353216; + mov.b32 %f218, %r62; + +BB0_10: + mov.b32 %r63, %f218; + and.b32 %r64, %r63, -2139095041; + or.b32 %r65, %r64, 1065353216; + mov.b32 %f219, %r65; + setp.gt.ftz.f32 %p16, %f219, 0f3FB504F3; + @%p16 bra BB0_11; + bra.uni BB0_12; + +BB0_11: + mul.rn.f32 %f219, %f219, %f44; + add.s32 %r93, %r93, 1; + +BB0_12: + add.ftz.f32 %f63, %f219, 0f3F800000; + rcp.approx.ftz.f32 %f57, %f63; + add.ftz.f32 %f56, %f219, 0fBF800000; + // inline asm + mul.rz.f32 %f55, %f56, %f57; + // inline asm + mul.rn.f32 %f65, %f45, %f55; + mul.rn.f32 %f66, %f65, %f65; + mov.f32 %f67, 0f3B18F0FE; + mul.rn.f32 %f68, %f67, %f66; + add.ftz.f32 %f69, %f68, 0f3C4CAF63; + mul.rn.f32 %f70, %f69, %f66; + add.ftz.f32 %f71, %f70, 0f3DAAAABD; + mul.rn.f32 %f72, %f71, %f66; + mul.rn.f32 %f60, %f72, %f65; + mov.b32 %r66, %f65; + and.b32 %r67, %r66, -4096; + mov.b32 %f73, %r67; + mov.b32 %r68, %f56; + and.b32 %r69, %r68, -4096; + mov.b32 %f74, %r69; + sub.ftz.f32 %f75, %f56, %f73; + mul.rn.f32 %f76, %f45, %f75; + sub.ftz.f32 %f77, %f56, %f74; + mul.rn.f32 %f78, %f73, %f74; + sub.ftz.f32 %f79, %f76, %f78; + mul.rn.f32 %f80, %f73, %f77; + sub.ftz.f32 %f81, %f79, %f80; + mul.rn.f32 %f82, %f57, %f81; + add.ftz.f32 %f83, %f73, %f82; + sub.ftz.f32 %f84, %f83, %f73; + sub.ftz.f32 %f85, %f82, %f84; + add.ftz.f32 %f86, %f83, %f60; + neg.ftz.f32 %f59, %f60; + // inline asm + add.rz.f32 %f58, %f59, %f60; + // inline asm + add.ftz.f32 %f87, %f58, %f85; + add.ftz.f32 %f88, %f86, %f87; + sub.ftz.f32 %f89, %f87, %f87; + cvt.rn.f32.s32 %f90, %r93; + mov.f32 %f91, 0f3F317200; + mul.rn.f32 %f92, %f90, %f91; + mov.f32 %f93, 0f35BFBE8E; + mul.rn.f32 %f94, %f90, %f93; + add.ftz.f32 %f95, %f92, %f88; + sub.ftz.f32 %f96, %f88, %f88; + add.ftz.f32 %f97, %f96, %f89; + add.ftz.f32 %f98, %f97, %f94; + add.ftz.f32 %f13, %f95, %f98; + sub.ftz.f32 %f14, %f98, %f98; + // inline asm + abs.f32 %f61, %f220; + // inline asm + setp.gt.ftz.f32 %p17, %f61, 0f77F684DF; + @%p17 bra BB0_13; + bra.uni BB0_14; + +BB0_13: + mov.f32 %f100, 0f39000000; + mov.f32 %f101, 0f42A00000; + mul.rn.f32 %f220, %f101, %f100; + +BB0_14: + mov.f32 %f102, 0f45800800; + mul.rn.f32 %f103, %f13, %f102; + sub.ftz.f32 %f104, %f13, %f103; + add.ftz.f32 %f105, %f104, %f103; + sub.ftz.f32 %f106, %f13, %f105; + mul.rn.f32 %f107, %f220, %f102; + sub.ftz.f32 %f108, %f220, %f107; + add.ftz.f32 %f109, %f108, %f107; + sub.ftz.f32 %f110, %f220, %f109; + mul.rn.f32 %f111, %f105, %f109; + mul.rn.f32 %f112, %f13, %f220; + sub.ftz.f32 %f113, %f111, %f112; + mul.rn.f32 %f114, %f105, %f110; + add.ftz.f32 %f115, %f113, %f114; + mul.rn.f32 %f116, %f106, %f109; + add.ftz.f32 %f117, %f115, %f116; + mul.rn.f32 %f118, %f106, %f110; + add.ftz.f32 %f119, %f117, %f118; + mul.rn.f32 %f120, %f14, %f220; + add.ftz.f32 %f121, %f120, %f119; + add.ftz.f32 %f122, %f112, %f121; + sub.ftz.f32 %f17, %f121, %f121; + mov.f32 %f223, %f17; + mov.f32 %f224, %f122; + mov.b32 %r24, %f122; + setp.eq.s32 %p18, %r24, 1118925336; + @%p18 bra BB0_15; + bra.uni BB0_16; + +BB0_15: + add.s32 %r70, %r24, -1; + mov.b32 %f123, %r70; + add.ftz.f32 %f124, %f17, 0f37000000; + mov.f32 %f223, %f124; + mov.f32 %f224, %f123; + +BB0_16: + // inline asm + mul.f32 %f125, %f224, 0f3FB8AA3B;ex2.approx.f32 %f125, %f125; + // inline asm + setp.neu.ftz.f32 %p19, %f125, 0f7F800000; + mov.f32 %f221, %f125; + @%p19 bra BB0_17; + bra.uni BB0_18; + +BB0_17: + // inline asm + mad.f32 %f127, %f125, %f223, %f125; + // inline asm + mov.f32 %f221, %f127; + +BB0_18: + not.pred %p21, %p3; + or.pred %p23, %p13, %p21; + mov.b32 %r71, %f221; + xor.b32 %r72, %r71, -2147483648; + mov.b32 %f131, %r72; + selp.f32 %f222, %f221, %f131, %p23; + bra.uni BB0_26; + +BB0_19: + mov.f32 %f222, 0f7FFFFFFF; + bra.uni BB0_26; + +BB0_20: + mov.b32 %r73, %f34; + setp.lt.s32 %p24, %r73, 0; + @%p24 bra BB0_22; + + mov.f32 %f222, 0f7F800000; + bra.uni BB0_26; + +BB0_22: + selp.f32 %f222, 0fFF800000, 0f7F800000, %p3; + bra.uni BB0_26; + +BB0_23: + mov.b32 %r74, %f34; + and.b32 %r75, %r74, -2147483648; + mov.b32 %f132, %r75; + selp.f32 %f222, %f132, 0f00000000, %p3; + bra.uni BB0_26; + +BB0_24: + add.ftz.f32 %f222, %f34, 0f42A00000; + bra.uni BB0_26; + +BB0_25: + mov.f32 %f222, 0f3F800000; + +BB0_26: + add.ftz.f32 %f135, %f222, 0f3F800000; + // inline asm + rsqrt.approx.f32 %f134, %f135; + // inline asm + mul.ftz.f32 %f136, %f134, 0f42000000; + div.approx.ftz.f32 %f137, %f3, %f2; + mul.ftz.f32 %f138, %f137, %f137; + add.ftz.f32 %f139, %f2, %f3; + div.approx.ftz.f32 %f28, %f138, %f139; + add.s32 %r78, %r45, %r42; + mad.lo.s32 %r79, %r44, %r43, %r78; + shl.b32 %r80, %r79, 8; + shl.b32 %r83, %r53, 8; + add.s32 %r84, %r80, %r83; + shl.b32 %r87, %r51, 3; + add.s32 %r88, %r84, %r87; + ld.param.u32 %r90, [applyChirp_param_0]; + add.s32 %r89, %r88, %r90; + add.s32 %r95, %r89, 1048576; + mov.u32 %r96, 128; + mov.u32 %r94, shr_2_local_DMs; + +BB0_27: + ld.global.v2.f32 {%f181, %f182}, [%r95+-1048576]; + ld.global.v2.f32 {%f183, %f184}, [%r95+-524288]; + ld.shared.f32 %f149, [%r94]; + mul.ftz.f32 %f144, %f149, %f28; + // inline asm + cos.approx.f32 %f141, %f144; + // inline asm + // inline asm + sin.approx.f32 %f143, %f144; + // inline asm + mul.ftz.f32 %f187, %f141, %f136; + mul.ftz.f32 %f188, %f143, %f136; + mul.ftz.f32 %f152, %f187, %f181; + neg.f32 %f155, %f188; + fma.rn.ftz.f32 %f156, %f155, %f182, %f152; + mul.ftz.f32 %f157, %f188, %f181; + fma.rn.ftz.f32 %f158, %f187, %f182, %f157; + st.global.v2.f32 [%r95+-1048576], {%f156, %f158}; + mul.ftz.f32 %f160, %f187, %f183; + fma.rn.ftz.f32 %f162, %f155, %f184, %f160; + mul.ftz.f32 %f163, %f188, %f183; + fma.rn.ftz.f32 %f164, %f187, %f184, %f163; + st.global.v2.f32 [%r95+-524288], {%f162, %f164}; + ld.global.v2.f32 {%f195, %f196}, [%r95]; + ld.global.v2.f32 {%f197, %f198}, [%r95+524288]; + ld.shared.f32 %f165, [%r94+4]; + mul.ftz.f32 %f148, %f165, %f28; + // inline asm + cos.approx.f32 %f145, %f148; + // inline asm + // inline asm + sin.approx.f32 %f147, %f148; + // inline asm + mul.ftz.f32 %f201, %f145, %f136; + mul.ftz.f32 %f202, %f147, %f136; + mul.ftz.f32 %f168, %f201, %f195; + neg.f32 %f171, %f202; + fma.rn.ftz.f32 %f172, %f171, %f196, %f168; + mul.ftz.f32 %f173, %f202, %f195; + fma.rn.ftz.f32 %f174, %f201, %f196, %f173; + st.global.v2.f32 [%r95], {%f172, %f174}; + mul.ftz.f32 %f176, %f201, %f197; + fma.rn.ftz.f32 %f178, %f171, %f198, %f176; + mul.ftz.f32 %f179, %f202, %f197; + fma.rn.ftz.f32 %f180, %f201, %f198, %f179; + st.global.v2.f32 [%r95+524288], {%f178, %f180}; + add.s32 %r95, %r95, 2097152; + add.s32 %r94, %r94, 8; + add.s32 %r96, %r96, -2; + setp.ne.s32 %p25, %r96, 0; + @%p25 bra BB0_27; + + ret; +} + + diff --git a/RTCP/GPUProc/src/BeamFormer/IncoherentStokes.cl b/RTCP/GPUProc/src/BeamFormer/IncoherentStokes.cl new file mode 100644 index 0000000000000000000000000000000000000000..0f3a14bb126d5326b394eede815a82ef4febba47 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/IncoherentStokes.cl @@ -0,0 +1,45 @@ +typedef __global float (*IncoherentStokesType)[NR_INCOHERENT_STOKES][NR_SAMPLES_PER_CHANNEL / INCOHERENT_STOKES_TIME_INTEGRATION_FACTOR][NR_CHANNELS]; +typedef __global float4 (*InputType)[NR_STATIONS][NR_CHANNELS][NR_SAMPLES_PER_CHANNEL / INCOHERENT_STOKES_TIME_INTEGRATION_FACTOR][INCOHERENT_STOKES_TIME_INTEGRATION_FACTOR]; + + +__kernel void incoherentStokes(__global void *restrict stokesPtr, + __global const void *restrict inputPtr) +{ + IncoherentStokesType stokes = (IncoherentStokesType) stokesPtr; + InputType input = (InputType) inputPtr; + + uint time = get_global_id(0); + uint channel = get_global_id(1); + + if (time >= NR_SAMPLES_PER_CHANNEL / INCOHERENT_STOKES_TIME_INTEGRATION_FACTOR) + return; + + float stokesI = 0; +#if NR_INCOHERENT_STOKES == 4 + float stokesQ = 0, halfStokesU = 0, halfStokesV = 0; +#endif + + for (uint station = 0; station < NR_STATIONS; station ++) { + for (uint t = 0; t < INCOHERENT_STOKES_TIME_INTEGRATION_FACTOR; t ++) { + float4 sample = (*input)[station][channel][time][t]; + float2 X = sample.xy; + float2 Y = sample.zw; + float powerX = X.x * X.x + X.y * X.y; + float powerY = Y.x * Y.x + Y.y * Y.y; + + stokesI += powerX + powerY; +#if NR_INCOHERENT_STOKES == 4 + stokesQ += powerX - powerY; + halfStokesU += X.x * Y.x + X.y * Y.y; + halfStokesV += X.y * Y.x - X.x * Y.y; +#endif + } + } + + (*stokes)[0][time][channel] = stokesI; +#if NR_INCOHERENT_STOKES == 4 + (*stokes)[1][time][channel] = stokesQ; + (*stokes)[2][time][channel] = 2 * halfStokesU; + (*stokes)[3][time][channel] = 2 * halfStokesV; +#endif +} diff --git a/RTCP/GPUProc/src/BeamFormer/IncoherentStokes.cl-0.ptx b/RTCP/GPUProc/src/BeamFormer/IncoherentStokes.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..be0613c10bbe025940d0054bfd9e8579149984e3 Binary files /dev/null and b/RTCP/GPUProc/src/BeamFormer/IncoherentStokes.cl-0.ptx differ diff --git a/RTCP/GPUProc/src/BeamFormer/IntToFloat.cl b/RTCP/GPUProc/src/BeamFormer/IntToFloat.cl new file mode 100644 index 0000000000000000000000000000000000000000..fcb5e035873fb78a05534c4deacdb444040601c9 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/IntToFloat.cl @@ -0,0 +1,25 @@ +#if NR_BITS_PER_SAMPLE == 16 +typedef short2 SampleType; +#elif NR_BITS_PER_SAMPLE == 8 +typedef char2 SampleType; +#else +#error unsupport NR_BITS_PER_SAMPLE +#endif + +typedef __global SampleType (*SampledDataType)[NR_STATIONS][NR_SAMPLES_PER_SUBBAND][NR_POLARIZATIONS]; +typedef __global float2 (*ConvertedDataType)[NR_STATIONS][NR_POLARIZATIONS][NR_SAMPLES_PER_SUBBAND]; + + +__kernel void intToFloat(__global void *restrict convertedDataPtr, + __global const void *restrict sampledDataPtr) +{ + ConvertedDataType convertedData = (ConvertedDataType) convertedDataPtr; + SampledDataType sampledData = (SampledDataType) sampledDataPtr; + + uint station = get_global_id(1); + + for (uint time = get_local_id(0); time < NR_SAMPLES_PER_SUBBAND; time += get_local_size(0)) { + (*convertedData)[station][0][time] = convert_float2((*sampledData)[station][time][0]); + (*convertedData)[station][1][time] = convert_float2((*sampledData)[station][time][1]); + } +} diff --git a/RTCP/GPUProc/src/BeamFormer/IntToFloat.cl-0.ptx b/RTCP/GPUProc/src/BeamFormer/IntToFloat.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..ce5c60b9ac9b08ef14ba9402e30163b4bede74bb --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/IntToFloat.cl-0.ptx @@ -0,0 +1,82 @@ +// +// Generated by NVIDIA NVVM Compiler +// Compiler built on Sat Sep 29 10:12:13 2012 (1348906333) +// Driver 304.54 +// + +.version 3.0 +.target sm_30, texmode_independent +.address_size 32 + + +.entry intToFloat( + .param .u32 .ptr .global .align 1 intToFloat_param_0, + .param .u32 .ptr .global .align 1 intToFloat_param_1 +) +{ + .reg .f32 %f<9>; + .reg .pred %p<3>; + .reg .s32 %r<25>; + .reg .s16 %rc<9>; + + + // inline asm + mov.u32 %r7, %envreg4; + // inline asm + // inline asm + mov.u32 %r8, %ntid.y; + // inline asm + // inline asm + mov.u32 %r9, %ctaid.y; + // inline asm + // inline asm + mov.u32 %r10, %tid.y; + // inline asm + add.s32 %r12, %r10, %r7; + mad.lo.s32 %r3, %r9, %r8, %r12; + // inline asm + mov.u32 %r11, %tid.x; + // inline asm + setp.gt.u32 %p1, %r11, 65535; + mov.u32 %r24, %r11; + @%p1 bra BB0_2; + +BB0_1: + shl.b32 %r14, %r3, 18; + ld.param.u32 %r23, [intToFloat_param_1]; + add.s32 %r15, %r23, %r14; + shl.b32 %r16, %r24, 2; + add.s32 %r17, %r15, %r16; + ld.global.v2.u8 {%rc5, %rc6}, [%r17]; + // inline asm + cvt.rn.f32.s8 %f1, %rc5; + // inline asm + // inline asm + cvt.rn.f32.s8 %f2, %rc6; + // inline asm + shl.b32 %r18, %r3, 20; + ld.param.u32 %r22, [intToFloat_param_0]; + add.s32 %r19, %r22, %r18; + shl.b32 %r20, %r24, 3; + add.s32 %r21, %r19, %r20; + st.global.v2.f32 [%r21], {%f1, %f2}; + ld.global.v2.u8 {%rc7, %rc8}, [%r17+2]; + // inline asm + cvt.rn.f32.s8 %f3, %rc7; + // inline asm + // inline asm + cvt.rn.f32.s8 %f4, %rc8; + // inline asm + st.global.v2.f32 [%r21+524288], {%f3, %f4}; + // inline asm + mov.u32 %r13, %ntid.x; + // inline asm + add.s32 %r24, %r13, %r24; + setp.lt.u32 %p2, %r24, 65536; + @%p2 bra BB0_1; + +BB0_2: + ret; +} + + diff --git a/RTCP/GPUProc/src/BeamFormer/Transpose.cl b/RTCP/GPUProc/src/BeamFormer/Transpose.cl new file mode 100644 index 0000000000000000000000000000000000000000..bc1af15319662633a138f1e28957be2e89829b68 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/Transpose.cl @@ -0,0 +1,88 @@ +#if 0 +typedef __global float2 (*TransposedDataType)[NR_TABS][NR_POLARIZATIONS][NR_SAMPLES_PER_CHANNEL][NR_CHANNELS]; +typedef __global float4 (*ComplexVoltagesType)[NR_CHANNELS][NR_SAMPLES_PER_CHANNEL][NR_TABS]; + +__kernel void transposeComplexVoltages(__global void *restrict transposedDataPtr, + __global const void *restrict complexVoltagesPtr) +{ + TransposedDataType transposedData = (TransposedDataType) transposedDataPtr; + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + + __local float4 tmp[16][17]; + + uint tabBase = 16 * get_global_id(1); + uint chBase = 16 * get_global_id(2); + + uint tabOffsetR = get_local_id(0) & 15; + uint tabR = tabBase + tabOffsetR; + uint chOffsetR = get_local_id(0) >> 4; + uint channelR = chBase + chOffsetR; + bool doR = NR_TABS % 16 == 0 || tabR < NR_TABS; + + uint tabOffsetW = get_local_id(0) >> 4; + uint tabW = tabBase + tabOffsetW; + uint chOffsetW = get_local_id(0) & 15; + uint channelW = chBase + chOffsetW; + bool doW = NR_TABS % 16 == 0 || tabW < NR_TABS; + + for (int time = 0; time < NR_SAMPLES_PER_CHANNEL; time ++) { + if (doR) + tmp[tabOffsetR][chOffsetR] = (*complexVoltages)[channelR][time][tabR]; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (doW) { + float4 sample = tmp[tabOffsetW][chOffsetW]; + (*transposedData)[tabW][0][time][channelW] = sample.xy; + (*transposedData)[tabW][1][time][channelW] = sample.zw; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } +} + +#else + +typedef __global float2 (*TransposedDataType)[NR_TABS][NR_POLARIZATIONS][NR_CHANNELS][NR_SAMPLES_PER_CHANNEL]; +typedef __global float4 (*ComplexVoltagesType)[NR_CHANNELS][NR_SAMPLES_PER_CHANNEL][NR_TABS]; + +__kernel void transposeComplexVoltages(__global void *restrict transposedDataPtr, + __global const void *restrict complexVoltagesPtr) +{ + TransposedDataType transposedData = (TransposedDataType) transposedDataPtr; + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + + __local float4 tmp[16][17]; + + uint tabBase = 16 * get_global_id(1); + uint timeBase = 16 * get_global_id(2); + + uint tabOffsetR = get_local_id(0) & 15; + uint tabR = tabBase + tabOffsetR; + uint timeOffsetR = get_local_id(0) >> 4; + uint timeR = timeBase + timeOffsetR; + bool doR = NR_TABS % 16 == 0 || tabR < NR_TABS; + + uint tabOffsetW = get_local_id(0) >> 4; + uint tabW = tabBase + tabOffsetW; + uint timeOffsetW = get_local_id(0) & 15; + uint timeW = timeBase + timeOffsetW; + bool doW = NR_TABS % 16 == 0 || tabW < NR_TABS; + + for (int channel = 0; channel < NR_CHANNELS; channel ++) { + if (doR) + tmp[tabOffsetR][timeOffsetR] = (*complexVoltages)[timeR][channel][tabR]; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (doW) { + float4 sample = tmp[tabOffsetW][timeOffsetW]; + (*transposedData)[tabW][0][channel][timeW] = sample.xy; + (*transposedData)[tabW][1][channel][timeW] = sample.zw; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } +} + +#endif diff --git a/RTCP/GPUProc/src/BeamFormer/Transpose.cl-0.ptx b/RTCP/GPUProc/src/BeamFormer/Transpose.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..26a5c9cca9b954543c29abfbb6bb29d45835a803 --- /dev/null +++ b/RTCP/GPUProc/src/BeamFormer/Transpose.cl-0.ptx @@ -0,0 +1,134 @@ +// +// Generated by NVIDIA NVVM Compiler +// Compiler built on Sat Sep 29 10:12:13 2012 (1348906333) +// Driver 304.54 +// + +.version 3.0 +.target sm_30, texmode_independent +.address_size 32 + +.extern .shared .align 16 .b8 shr_3_tmp[4352]; + +.entry transposeComplexVoltages( + .param .u32 .ptr .global .align 1 transposeComplexVoltages_param_0, + .param .u32 .ptr .global .align 1 transposeComplexVoltages_param_1 +) +{ + .reg .f32 %f<65>; + .reg .pred %p<2>; + .reg .s32 %r<58>; + + + ld.param.u32 %r26, [transposeComplexVoltages_param_0]; + ld.param.u32 %r27, [transposeComplexVoltages_param_1]; + // inline asm + mov.u32 %r13, %envreg4; + // inline asm + // inline asm + mov.u32 %r14, %ntid.y; + // inline asm + // inline asm + mov.u32 %r15, %ctaid.y; + // inline asm + // inline asm + mov.u32 %r16, %tid.y; + // inline asm + // inline asm + mov.u32 %r17, %envreg5; + // inline asm + // inline asm + mov.u32 %r18, %ntid.z; + // inline asm + // inline asm + mov.u32 %r19, %ctaid.z; + // inline asm + // inline asm + mov.u32 %r20, %tid.z; + // inline asm + // inline asm + mov.u32 %r21, %tid.x; + // inline asm + // inline asm + mov.u32 %r22, %tid.x; + // inline asm + shr.u32 %r28, %r22, 4; + // inline asm + mov.u32 %r23, %tid.x; + // inline asm + shr.u32 %r29, %r23, 4; + // inline asm + mov.u32 %r24, %tid.x; + // inline asm + and.b32 %r30, %r21, 15; + mov.u32 %r31, shr_3_tmp; + mad.lo.s32 %r32, %r30, 272, %r31; + and.b32 %r33, %r22, -16; + add.s32 %r1, %r32, %r33; + mad.lo.s32 %r34, %r29, 272, %r31; + and.b32 %r35, %r24, 15; + shl.b32 %r36, %r35, 4; + add.s32 %r2, %r34, %r36; + add.s32 %r37, %r20, %r17; + mad.lo.s32 %r38, %r19, %r18, %r37; + shl.b32 %r39, %r38, 4; + add.s32 %r40, %r28, %r39; + shl.b32 %r41, %r40, 16; + add.s32 %r42, %r16, %r13; + mad.lo.s32 %r43, %r15, %r14, %r42; + shl.b32 %r44, %r43, 4; + add.s32 %r45, %r44, %r30; + shl.b32 %r46, %r45, 4; + add.s32 %r47, %r41, %r46; + add.s32 %r48, %r47, %r27; + add.s32 %r56, %r48, 4096; + add.s32 %r49, %r29, %r44; + shl.b32 %r50, %r49, 20; + add.s32 %r51, %r39, %r35; + shl.b32 %r52, %r51, 3; + add.s32 %r53, %r50, %r52; + add.s32 %r54, %r53, %r26; + add.s32 %r55, %r54, 525056; + mov.u32 %r57, 2048; + +BB0_1: + add.s32 %r8, %r56, -4096; + ld.global.v4.f32 {%f61, %f62, %f63, %f64}, [%r56+-4096]; + st.shared.v4.f32 [%r1], {%f61, %f62, %f63, %f64}; + bar.sync 0; + ld.shared.v4.f32 {%f53, %f54, %f55, %f56}, [%r2]; + add.s32 %r9, %r55, -525056; + st.global.v2.f32 [%r55+-525056], {%f53, %f54}; + st.global.v2.f32 [%r55+-768], {%f55, %f56}; + bar.sync 0; + ld.global.v4.f32 {%f49, %f50, %f51, %f52}, [%r8+2048]; + st.shared.v4.f32 [%r1], {%f49, %f50, %f51, %f52}; + bar.sync 0; + ld.shared.v4.f32 {%f41, %f42, %f43, %f44}, [%r2]; + st.global.v2.f32 [%r9+256], {%f41, %f42}; + st.global.v2.f32 [%r9+524544], {%f43, %f44}; + bar.sync 0; + ld.global.v4.f32 {%f37, %f38, %f39, %f40}, [%r8+4096]; + st.shared.v4.f32 [%r1], {%f37, %f38, %f39, %f40}; + bar.sync 0; + ld.shared.v4.f32 {%f29, %f30, %f31, %f32}, [%r2]; + st.global.v2.f32 [%r9+512], {%f29, %f30}; + st.global.v2.f32 [%r9+524800], {%f31, %f32}; + bar.sync 0; + ld.global.v4.f32 {%f25, %f26, %f27, %f28}, [%r8+6144]; + st.shared.v4.f32 [%r1], {%f25, %f26, %f27, %f28}; + bar.sync 0; + ld.shared.v4.f32 {%f17, %f18, %f19, %f20}, [%r2]; + st.global.v2.f32 [%r9+768], {%f17, %f18}; + st.global.v2.f32 [%r9+525056], {%f19, %f20}; + bar.sync 0; + add.s32 %r56, %r56, 8192; + add.s32 %r55, %r55, 1024; + add.s32 %r57, %r57, -4; + setp.ne.s32 %p1, %r57, 0; + @%p1 bra BB0_1; + + ret; +} + + diff --git a/RTCP/GPUProc/src/BeamletBuffer.cc b/RTCP/GPUProc/src/BeamletBuffer.cc new file mode 100644 index 0000000000000000000000000000000000000000..2cba113a936c59501c8b5625efcf2a9a249f1a11 --- /dev/null +++ b/RTCP/GPUProc/src/BeamletBuffer.cc @@ -0,0 +1,417 @@ +//# BeamletBuffer.cc: one line description +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: BeamletBuffer.cc 18226 2011-06-09 12:56:47Z romein $ + +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +#include <Interface/Align.h> +#include <Interface/Exceptions.h> +#include <BeamletBuffer.h> +//#include <ION_Allocator.h> +//#include <InputThreadAsm.h> +#include <RSP.h> + +#include <boost/lexical_cast.hpp> +#include <cstring> +#include <stdexcept> + +#include <boost/format.hpp> +using boost::format; + + +namespace LOFAR { +namespace RTCP { + +template<typename SAMPLE_TYPE> const unsigned BeamletBuffer<SAMPLE_TYPE>::itsNrTimesPerPacket; + + +// The buffer size is a multiple of the input packet size. By setting +// itsOffset to a proper value, we can assure that input packets never +// wrap around the circular buffer + +template<typename SAMPLE_TYPE> BeamletBuffer<SAMPLE_TYPE>::BeamletBuffer(const Parset *ps, string &stationName, unsigned rspBoard) +: + itsRSPboard(rspBoard), + itsNrSubbands(ps->nrSlotsInFrame()), + itsPacketSize(sizeof(struct RSP::Header) + itsNrTimesPerPacket * itsNrSubbands * NR_POLARIZATIONS * sizeof(SAMPLE_TYPE)), + itsSize(align(ps->inputBufferSize(), itsNrTimesPerPacket)), + itsHistorySize(ps->nrHistorySamples()), + itsIsRealTime(ps->realTime()), + itsSynchronizedReaderWriter(itsIsRealTime ? 0 : new SynchronizedReaderAndWriter(itsSize)), // FIXME: does not work for multiple observations + itsLockedRanges(itsSize), + itsSBBuffers(boost::extents[itsNrSubbands][itsSize][NR_POLARIZATIONS], 128 /*, hugeMemoryAllocator*/), + itsOffset(0), + itsPreviousTimeStamp(0), + itsPreviousI(0), + itsCurrentTimeStamp(0), + itsCurrentI(0), +#if defined HAVE_BGP && !defined USE_VALGRIND + itsStride(itsSBBuffers[0].num_elements() * sizeof(SAMPLE_TYPE)), +#else + itsStride(itsSBBuffers[0].num_elements()), +#endif + itsReadTimer("buffer read", true, true), + itsWriteTimer("buffer write", true, true) +{ + itsLogPrefix = str(format("[station %s board %u] ") % stationName % rspBoard); + + if (ps->getUint32("OLAP.nrTimesInFrame") != itsNrTimesPerPacket) + THROW(GPUProcException, "OLAP.nrTimesInFrame should be " << boost::lexical_cast<std::string>(itsNrTimesPerPacket)); + +#if 0 + if (ps->realTime()) + itsSynchronizedReaderWriter = new TimeSynchronizedReader(ps->maxNetworkDelay()); + else + itsSynchronizedReaderWriter = new SynchronizedReaderAndWriter(itsSize); +#endif + +#if defined USE_VALGRIND + memset(itsSBBuffers.origin(), 0, itsSBBuffers.num_elements() * sizeof(SAMPLE_TYPE)); +#endif + + LOG_DEBUG_STR(itsLogPrefix << "Circular buffer at " << itsSBBuffers.origin() << "; contains " << itsSize << " samples"); +} + + +#if defined HAVE_BGP && !defined USE_VALGRIND + +template<> inline void BeamletBuffer<i4complex>::writePacket(i4complex *dst, const i4complex *src) +{ + _copy_pkt_to_bbuffer_32_bytes(dst, itsStride, src, itsNrSubbands); +} + +template<> inline void BeamletBuffer<i8complex>::writePacket(i8complex *dst, const i8complex *src) +{ + _copy_pkt_to_bbuffer_64_bytes(dst, itsStride, src, itsNrSubbands); +} + +template<> inline void BeamletBuffer<i16complex>::writePacket(i16complex *dst, const i16complex *src) +{ + _copy_pkt_to_bbuffer_128_bytes(dst, itsStride, src, itsNrSubbands); +} + +#endif + + +template<typename SAMPLE_TYPE> inline void BeamletBuffer<SAMPLE_TYPE>::writePacket(SAMPLE_TYPE *dst, const SAMPLE_TYPE *src) +{ + for (unsigned sb = 0; sb < itsNrSubbands; sb ++) { + for (unsigned i = 0; i < itsNrTimesPerPacket * NR_POLARIZATIONS; i ++) + dst[i] = *src ++; + + dst += itsStride; + } +} + + +template<typename SAMPLE_TYPE> inline void BeamletBuffer<SAMPLE_TYPE>::updateValidData(const TimeStamp &begin, const TimeStamp &end) +{ + ScopedLock sl(itsValidDataMutex); + + itsValidData.exclude(0, end - itsSize); // forget old ValidData + + // add new ValidData (except if range list will grow too long, to avoid long + // computations) + + const SparseSet<TimeStamp>::Ranges &ranges = itsValidData.getRanges(); + + if (ranges.size() < 64 || ranges.back().end == begin) + itsValidData.include(begin, end); +} + + +template<typename SAMPLE_TYPE> void BeamletBuffer<SAMPLE_TYPE>::writeConsecutivePackets(unsigned count) +{ + unsigned nrTimes = count * itsNrTimesPerPacket; + TimeStamp begin = itsCurrentTimeStamp, end = begin + nrTimes; + unsigned startI = itsCurrentI, endI = startI + nrTimes; + + if (endI >= itsSize) + endI -= itsSize; + + SAMPLE_TYPE *dst = itsSBBuffers[0][startI].origin(); + + // in synchronous mode, do not overrun tail of reader + if (!itsIsRealTime) + itsSynchronizedReaderWriter->startWrite(begin, end); + + // do not write in circular buffer section that is being read + itsLockedRanges.lock(startI, endI); + + while (itsCurrentI != endI) { + writePacket(dst, reinterpret_cast<const SAMPLE_TYPE *>(itsCurrentPacketPtr)); + itsCurrentPacketPtr += itsPacketSize; + dst += itsNrTimesPerPacket * NR_POLARIZATIONS; + + if ((itsCurrentI += itsNrTimesPerPacket) == itsSize) { + itsCurrentI = 0; + dst = itsSBBuffers.origin(); + } + } + + itsCurrentTimeStamp = end; + updateValidData(begin, end); + + itsLockedRanges.unlock(startI, endI); + + if (!itsIsRealTime) + itsSynchronizedReaderWriter->finishedWrite(end); +} + + +template<typename SAMPLE_TYPE> void BeamletBuffer<SAMPLE_TYPE>::resetCurrentTimeStamp(const TimeStamp &newTimeStamp) +{ + // A packet with unexpected timestamp was received. Handle accordingly. + bool firstPacket = !itsCurrentTimeStamp; // the first timestamp is always unexpected + + itsCurrentTimeStamp = newTimeStamp; + itsCurrentI = mapTime2Index(newTimeStamp); + + if (!aligned(itsCurrentI, itsNrTimesPerPacket)) { + // RSP board reset? Recompute itsOffset and clear the entire buffer. + + ScopedLock sl(itsReadMutex); // avoid reset while other thread reads + + int oldOffset = itsOffset; + itsOffset = - (newTimeStamp % itsNrTimesPerPacket); + itsCurrentI = mapTime2Index(newTimeStamp); + assert(aligned(itsCurrentI, itsNrTimesPerPacket)); + + { + ScopedLock sl(itsValidDataMutex); + itsValidData.reset(); + } + + if (!firstPacket) { + LOG_WARN_STR(itsLogPrefix << "Reset BeamletBuffer at " << newTimeStamp << "; itsOffset was " << oldOffset << " and becomes " << itsOffset); + } + } +} + + +template<typename SAMPLE_TYPE> void BeamletBuffer<SAMPLE_TYPE>::writeMultiplePackets(const void *rspData, const std::vector<TimeStamp> &timeStamps) +{ + itsWriteTimer.start(); + itsCurrentPacketPtr = reinterpret_cast<const char *>(rspData) + sizeof(struct RSP::Header); + + for (unsigned first = 0, last; first < timeStamps.size();) { + if (timeStamps[first] != itsCurrentTimeStamp) + resetCurrentTimeStamp(timeStamps[first]); + + // find a series of consecutively timed packets + for (last = first + 1; last < timeStamps.size() && timeStamps[last] == timeStamps[last - 1] + itsNrTimesPerPacket; last ++) + ; + + writeConsecutivePackets(last - first); + first = last; + } + + itsWriteTimer.stop(); +} + + +template<typename SAMPLE_TYPE> void BeamletBuffer<SAMPLE_TYPE>::writePacketData(const SAMPLE_TYPE *data, const TimeStamp &begin) +{ + itsWriteTimer.start(); + + TimeStamp end = begin + itsNrTimesPerPacket; + + // cache previous index, to avoid expensive mapTime2Index() + unsigned startI; + + if (begin == itsPreviousTimeStamp) { + startI = itsPreviousI; + } else { + startI = mapTime2Index(begin); + + if (!aligned(startI, itsNrTimesPerPacket)) { + // RSP board reset? Recompute itsOffset and clear the entire buffer. + itsOffset = - (startI % itsNrTimesPerPacket); + startI = mapTime2Index(begin); + + { + ScopedLock sl(itsValidDataMutex); + itsValidData.reset(); + } + } + + //LOG_DEBUG_STR(""timestamp = " << (uint64_t) begin << ", itsOffset = " << itsOffset"); + } + + unsigned endI = startI + itsNrTimesPerPacket; + + if (endI >= itsSize) + endI -= itsSize; + + itsPreviousTimeStamp = end; + itsPreviousI = endI; + + // in synchronous mode, do not overrun tail of reader + if (!itsIsRealTime) + itsSynchronizedReaderWriter->startWrite(begin, end); + + // do not write in circular buffer section that is being read + itsLockedRanges.lock(startI, endI); + + writePacket(itsSBBuffers[0][startI].origin(), data); + + // forget old ValidData + { + ScopedLock sl(itsValidDataMutex); + itsValidData.exclude(0, end - itsSize); + + unsigned rangesSize = itsValidData.getRanges().size(); + + // add new ValidData (except if range list will grow too long, to avoid long + // computations) + if (rangesSize < 64 || itsValidData.getRanges()[rangesSize - 1].end == begin) + itsValidData.include(begin, end); + } + + itsLockedRanges.unlock(startI, endI); + + if (!itsIsRealTime) + itsSynchronizedReaderWriter->finishedWrite(end); + + itsWriteTimer.stop(); +} + + +template<typename SAMPLE_TYPE> void BeamletBuffer<SAMPLE_TYPE>::startReadTransaction(const std::vector<TimeStamp> &begin, unsigned nrElements) +{ + // in synchronous mode, do not overrun writer + if (!itsIsRealTime) { + TimeStamp minBegin = *std::min_element(begin.begin(), begin.end()); + TimeStamp maxEnd = *std::max_element(begin.begin(), begin.end()) + nrElements; + itsSynchronizedReaderWriter->startRead(minBegin, maxEnd); + } + + itsReadMutex.lock(); // only one reader per BeamletBuffer allowed + itsReadTimer.start(); + + unsigned nrBeams = begin.size(); + + itsEnd.resize(nrBeams); + itsStartI.resize(nrBeams); + itsEndI.resize(nrBeams); + + itsBegin = begin; + + for (unsigned beam = 0; beam < begin.size(); beam ++) { + itsEnd[beam] = begin[beam] + nrElements; + itsStartI[beam] = mapTime2Index(begin[beam]); + itsEndI[beam] = mapTime2Index(itsEnd[beam]); + } + + itsMinEnd = *std::min_element(itsEnd.begin(), itsEnd.end()); + itsMinStartI = *std::min_element(itsStartI.begin(), itsStartI.end()); + itsMaxEndI = *std::max_element(itsEndI.begin(), itsEndI.end()); + + // do not read from circular buffer section that is being written + itsLockedRanges.lock(itsMinStartI, itsMaxEndI); +} + + +template<typename SAMPLE_TYPE> void BeamletBuffer<SAMPLE_TYPE>::sendSubband(Stream *str, unsigned subband, unsigned beam) const +{ + // Align to 32 bytes and make multiple of 32 bytes by prepending/appending + // extra data. Always send 32 bytes extra, even if data was already aligned. + unsigned startI = align(itsStartI[beam] - itsAlignment + 1, itsAlignment); // round down + unsigned endI = align(itsEndI[beam] + 1, itsAlignment); // round up, possibly adding 32 bytes + + if (endI < startI) { + // the data wraps around the allocated memory, so copy in two parts + unsigned firstChunk = itsSize - startI; + + str->write(itsSBBuffers[subband][startI].origin(), sizeof(SAMPLE_TYPE[firstChunk][NR_POLARIZATIONS])); + str->write(itsSBBuffers[subband][0].origin(), sizeof(SAMPLE_TYPE[endI][NR_POLARIZATIONS])); + } else { + str->write(itsSBBuffers[subband][startI].origin(), sizeof(SAMPLE_TYPE[endI - startI][NR_POLARIZATIONS])); + } +} + + +template<typename SAMPLE_TYPE> void BeamletBuffer<SAMPLE_TYPE>::sendUnalignedSubband(Stream *str, unsigned subband, unsigned beam) const +{ + if (itsEndI[beam] < itsStartI[beam]) { + // the data wraps around the allocated memory, so copy in two parts + unsigned firstChunk = itsSize - itsStartI[beam]; + + str->write(itsSBBuffers[subband][itsStartI[beam]].origin(), sizeof(SAMPLE_TYPE[firstChunk][NR_POLARIZATIONS])); + str->write(itsSBBuffers[subband][0].origin(), sizeof(SAMPLE_TYPE[itsEndI[beam]][NR_POLARIZATIONS])); + } else { + str->write(itsSBBuffers[subband][itsStartI[beam]].origin(), sizeof(SAMPLE_TYPE[itsEndI[beam] - itsStartI[beam]][NR_POLARIZATIONS])); + } +} + + +template<typename SAMPLE_TYPE> SparseSet<unsigned> BeamletBuffer<SAMPLE_TYPE>::readFlags(unsigned beam) +{ + itsValidDataMutex.lock(); + SparseSet<TimeStamp> validTimes = itsValidData.subset(itsBegin[beam], itsEnd[beam]); + itsValidDataMutex.unlock(); + + SparseSet<unsigned> flags; + flags.include(0, static_cast<unsigned>(itsEnd[beam] - itsBegin[beam])); + + for (SparseSet<TimeStamp>::const_iterator it = validTimes.getRanges().begin(); it != validTimes.getRanges().end(); it ++) + flags.exclude(static_cast<unsigned>(it->begin - itsBegin[beam]), + static_cast<unsigned>(it->end - itsBegin[beam])); + + return flags; +} + + +template<typename SAMPLE_TYPE> void BeamletBuffer<SAMPLE_TYPE>::stopReadTransaction() +{ + itsLockedRanges.unlock(itsMinStartI, itsMaxEndI); + + if (!itsIsRealTime) + itsSynchronizedReaderWriter->finishedRead(itsMinEnd - (itsHistorySize + 16)); + // subtract 16 extra; due to alignment restrictions and the changing delays, + // it is hard to predict where the next read will begin. + + itsReadTimer.stop(); + itsReadMutex.unlock(); +} + + +template<typename SAMPLE_TYPE> void BeamletBuffer<SAMPLE_TYPE>::noMoreReading() +{ + if (!itsIsRealTime) + itsSynchronizedReaderWriter->noMoreReading(); +} + + +template<typename SAMPLE_TYPE> void BeamletBuffer<SAMPLE_TYPE>::noMoreWriting() +{ + if (!itsIsRealTime) + itsSynchronizedReaderWriter->noMoreWriting(); +} + + +template class BeamletBuffer<i4complex>; +template class BeamletBuffer<i8complex>; +template class BeamletBuffer<i16complex>; + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/GPUProc/src/BeamletBuffer.h b/RTCP/GPUProc/src/BeamletBuffer.h new file mode 100644 index 0000000000000000000000000000000000000000..625b52cea8476f5386af5de8eddb311d6a3980d3 --- /dev/null +++ b/RTCP/GPUProc/src/BeamletBuffer.h @@ -0,0 +1,133 @@ +//# BeamletBuffer.h: a cyclic buffer that holds the beamlets from the rspboards +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: BeamletBuffer.h 17975 2011-05-10 09:52:51Z mol $ + +#ifndef LOFAR_GPUPROC_BEAMLET_BUFFER_H +#define LOFAR_GPUPROC_BEAMLET_BUFFER_H + +// \file +// a cyclic buffer that holds the beamlets from the rspboards + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + +//# Includes +#include <Common/lofar_vector.h> +#include <Common/lofar_complex.h> +#include <Common/Timer.h> +#include <Interface/Config.h> +#include <Interface/MultiDimArray.h> +#include <Interface/Parset.h> +#include <Interface/RSPTimeStamp.h> +#include <Interface/SmartPtr.h> +#include <Interface/SparseSet.h> +#include <LockedRanges.h> +#include <ReaderWriterSynchronization.h> +#include <Stream/Stream.h> +#include <Common/Thread/Mutex.h> + +#include <vector> +#include <string> + + +namespace LOFAR { +namespace RTCP { + +// define a "simple" type of which the size equals the size of two samples +// (X and Y polarizations) + + +template<typename SAMPLE_TYPE> class BeamletBuffer +{ + public: + BeamletBuffer(const Parset *, std::string &stationName, unsigned rspBoard); + + void writePacketData(const SAMPLE_TYPE *data, const TimeStamp &begin); + void writeMultiplePackets(const void *rspData, const std::vector<TimeStamp> &); + + void startReadTransaction(const std::vector<TimeStamp> &begin, unsigned nrElements); + void sendSubband(Stream *, unsigned subband, unsigned currentBeam) const; + void sendUnalignedSubband(Stream *, unsigned subband, unsigned currentBeam) const; + unsigned alignmentShift(unsigned beam) const; + SparseSet<unsigned> readFlags(unsigned beam); + void stopReadTransaction(); + + void noMoreReading(); + void noMoreWriting(); + + const static unsigned itsNrTimesPerPacket = 16; + + private: + unsigned mapTime2Index(TimeStamp time) const; + + std::string itsLogPrefix; + + Mutex itsValidDataMutex; + SparseSet<TimeStamp> itsValidData; + unsigned itsRSPboard; + unsigned itsNrSubbands; + size_t itsPacketSize; + unsigned itsSize, itsHistorySize; + bool itsIsRealTime; + SmartPtr<SynchronizedReaderAndWriter> itsSynchronizedReaderWriter; + LockedRanges itsLockedRanges; + Cube<SAMPLE_TYPE> itsSBBuffers; + int itsOffset; + const static unsigned itsAlignment = 32 / (NR_POLARIZATIONS * sizeof(SAMPLE_TYPE)); + + // read internals + std::vector<TimeStamp> itsBegin, itsEnd; + std::vector<size_t> itsStartI, itsEndI; + size_t itsMinStartI, itsMaxEndI; + TimeStamp itsMinEnd; + Mutex itsReadMutex; + + // write internals + void writePacket(SAMPLE_TYPE *dst, const SAMPLE_TYPE *src); + void updateValidData(const TimeStamp &begin, const TimeStamp &end); + void writeConsecutivePackets(unsigned count); + void resetCurrentTimeStamp(const TimeStamp &); + + TimeStamp itsPreviousTimeStamp; + unsigned itsPreviousI; + TimeStamp itsCurrentTimeStamp; + unsigned itsCurrentI; + size_t itsStride; + const char *itsCurrentPacketPtr; + + NSTimer itsReadTimer, itsWriteTimer; +}; + + +template<typename SAMPLE_TYPE> inline unsigned BeamletBuffer<SAMPLE_TYPE>::alignmentShift(unsigned beam) const +{ + return itsStartI[beam] % itsAlignment; +} + +template<typename SAMPLE_TYPE> inline unsigned BeamletBuffer<SAMPLE_TYPE>::mapTime2Index(TimeStamp time) const +{ + // TODO: this is very slow because of the % + return (time + itsOffset) % itsSize; +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/BeamletBufferToComputeNode.cc b/RTCP/GPUProc/src/BeamletBufferToComputeNode.cc new file mode 100644 index 0000000000000000000000000000000000000000..22efb3626d037028d4b21eb14a38debc81adc758 --- /dev/null +++ b/RTCP/GPUProc/src/BeamletBufferToComputeNode.cc @@ -0,0 +1,452 @@ +//# BeamletBufferToComputeNode.cc: Catch RSP ethernet frames and synchronize RSP inputs +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: BeamletBufferToComputeNode.cc 18226 2011-06-09 12:56:47Z romein $ + +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +//# Includes +#include <Common/Timer.h> +#include <Common/PrettyUnits.h> +#include <BeamletBufferToComputeNode.h> +#include <BeamletBuffer.h> +#include <ION_Allocator.h> +#include <Scheduling.h> +#include <Interface/AlignedStdAllocator.h> +#include <Interface/BFRawFormat.h> +#include <Interface/CN_Command.h> +#include <Interface/CN_Mapping.h> +#include <Interface/Stream.h> +#include <Interface/SubbandMetaData.h> +#include <Interface/Exceptions.h> + +#include <sys/time.h> + +#include <cstdio> +#include <stdexcept> + +#include <boost/format.hpp> + + +namespace LOFAR { +namespace RTCP { + + +template<typename SAMPLE_TYPE> const unsigned BeamletBufferToComputeNode<SAMPLE_TYPE>::itsMaximumDelay; + + +template<typename SAMPLE_TYPE> BeamletBufferToComputeNode<SAMPLE_TYPE>::BeamletBufferToComputeNode(const Parset &ps, const std::vector<Stream *> &phaseOneTwoStreams, const std::vector<SmartPtr<BeamletBuffer<SAMPLE_TYPE> > > &beamletBuffers, unsigned psetNumber) +: + itsFileHeaderWritten(false), + itsPhaseOneTwoStreams(phaseOneTwoStreams), + itsPS(ps), + itsNrInputs(beamletBuffers.size()), + itsPsetNumber(psetNumber), + itsBeamletBuffers(beamletBuffers), + itsBlockNumber(0), + itsDelayTimer("delay consumer", true, true) +{ + bool haveStationInput = itsNrInputs > 0; + string stationName = haveStationInput ? ps.getStationNamesAndRSPboardNumbers(psetNumber)[0].station : "none"; // TODO: support more than one station + + itsLogPrefix = str(boost::format("[obs %u station %s] ") % ps.observationID() % stationName); + + itsSubbandBandwidth = ps.subbandBandwidth(); + itsNrSubbands = ps.nrSubbands(); + itsNrSubbandsPerPset = ps.nrSubbandsPerPset(); + itsNrSamplesPerSubband = ps.nrSamplesPerSubband(); + itsNrBeams = ps.nrBeams(); + itsNrTABs = ps.nrTABs(); + itsNrPhaseTwoPsets = ps.phaseTwoPsets().size(); + itsCurrentPhaseOneTwoComputeCore = 0; + itsSampleDuration = ps.sampleDuration(); + itsDelayCompensation = ps.delayCompensation(); + itsCorrectClocks = ps.correctClocks(); + itsNeedDelays = (itsDelayCompensation || itsNrTABs > 1 || itsCorrectClocks) && itsNrInputs > 0; + itsSubbandToSAPmapping = ps.subbandToSAPmapping(); + + if (haveStationInput) { + itsSubbandToRSPboardMapping = ps.subbandToRSPboardMapping(stationName); + itsSubbandToRSPslotMapping = ps.subbandToRSPslotMapping(stationName); + } + + itsCurrentTimeStamp = TimeStamp(static_cast<int64>(ps.startTime() * itsSubbandBandwidth), ps.clockSpeed()); + itsIsRealTime = ps.realTime(); + itsMaxNetworkDelay = ps.maxNetworkDelay(); + itsDumpRawData = ps.dumpRawData(); + itsNrHistorySamples = itsDumpRawData ? 0 : ps.nrHistorySamples(); + itsObservationID = ps.observationID(); + + LOG_DEBUG_STR(itsLogPrefix << "nrSubbands = " << itsNrSubbands); + LOG_DEBUG_STR(itsLogPrefix << "nrChannelsPerSubband = " << ps.nrChannelsPerSubband()); + LOG_DEBUG_STR(itsLogPrefix << "nrStations = " << ps.nrStations()); + LOG_DEBUG_STR(itsLogPrefix << "nrBitsPerSample = " << ps.nrBitsPerSample()); + LOG_DEBUG_STR(itsLogPrefix << "maxNetworkDelay = " << itsMaxNetworkDelay << " samples"); + + if (haveStationInput && itsNeedDelays) { + itsDelaysAtBegin.resize(itsNrBeams, itsNrTABs + 1); + itsDelaysAfterEnd.resize(itsNrBeams, itsNrTABs + 1); + itsBeamDirectionsAtBegin.resize(itsNrBeams, itsNrTABs + 1); + itsBeamDirectionsAfterEnd.resize(itsNrBeams, itsNrTABs + 1); + + if (itsDelayCompensation || itsNrTABs > 1) + itsDelays = new Delays(ps, stationName, itsCurrentTimeStamp); + + if (itsCorrectClocks) + itsClockCorrectionTime = ps.clockCorrectionTime(stationName); + + computeNextDelays(); // initialize itsDelaysAfterEnd before we really start + } + + itsDelayedStamps.resize(itsNrBeams); + itsSamplesDelay.resize(itsNrBeams); + itsFineDelaysAtBegin.resize(itsNrBeams, itsNrTABs + 1); + itsFineDelaysAfterEnd.resize(itsNrBeams, itsNrTABs + 1); + itsFlags.resize(boost::extents[itsNrInputs][itsNrBeams]); + + if (itsDumpRawData && itsNrInputs > 0) { + LOG_INFO_STR(itsLogPrefix << "Dumping raw beamformed data only, no further processing done"); + + vector<string> rawDataOutputs = ps.getStringVector("OLAP.OLAP_Conn.rawDataOutputs",true); + unsigned psetIndex = ps.phaseOnePsetIndex(itsPsetNumber); + + if (psetIndex >= rawDataOutputs.size()) + THROW(IONProcException, "there are more input section nodes than entries in OLAP.OLAP_Conn.rawDataOutputs"); + + string rawDataOutput = rawDataOutputs[psetIndex]; + LOG_INFO_STR(itsLogPrefix << "Writing raw data to " << rawDataOutput); + itsRawDataStream = createStream(rawDataOutput, false); + } + +#if defined HAVE_BGP_ION // FIXME: not in preprocess + doNotRunOnCore0(); + setPriority(3); +#endif +} + + +template<typename SAMPLE_TYPE> BeamletBufferToComputeNode<SAMPLE_TYPE>::~BeamletBufferToComputeNode() +{ + LOG_DEBUG_STR(itsLogPrefix << "BeamletBufferToComputeNode::~BeamletBufferToComputeNode"); + + for (unsigned rsp = 0; rsp < itsNrInputs; rsp ++) + itsBeamletBuffers[rsp]->noMoreReading(); +} + + +template<typename SAMPLE_TYPE> void BeamletBufferToComputeNode<SAMPLE_TYPE>::computeNextDelays() +{ + // track source + + if (itsDelays != 0) + itsDelays->getNextDelays(itsBeamDirectionsAfterEnd, itsDelaysAfterEnd); + else + for (unsigned beam = 0; beam < itsNrBeams; beam ++) + for (unsigned pencil = 0; pencil < itsNrTABs + 1; pencil ++) + itsDelaysAfterEnd[beam][pencil] = 0; + + // apply clock correction due to cable differences + + if (itsCorrectClocks) + for (unsigned beam = 0; beam < itsNrBeams; beam ++) + for (unsigned pencil = 0; pencil < itsNrTABs + 1; pencil ++) + itsDelaysAfterEnd[beam][pencil] += itsClockCorrectionTime; +} + + +template<typename SAMPLE_TYPE> void BeamletBufferToComputeNode<SAMPLE_TYPE>::limitFlagsLength(SparseSet<unsigned> &flags) +{ + const SparseSet<unsigned>::Ranges &ranges = flags.getRanges(); + + if (ranges.size() > 16) + flags.include(ranges[15].begin, ranges.back().end); +} + + +template<typename SAMPLE_TYPE> void BeamletBufferToComputeNode<SAMPLE_TYPE>::computeDelays() +{ + itsDelayTimer.start(); + + // begin of this integration is end of previous integration + itsDelaysAtBegin = itsDelaysAfterEnd; + itsBeamDirectionsAtBegin = itsBeamDirectionsAfterEnd; + + computeNextDelays(); + + for (unsigned beam = 0; beam < itsNrBeams; beam ++) { + // The coarse delay is determined for the center of the current + // time interval and is expressed in an entire amount of samples. + // + // We use the central pencil beam (#0) for the coarse delay compensation. + signed int coarseDelay = static_cast<signed int>(floor(0.5 * (itsDelaysAtBegin[beam][0] + itsDelaysAfterEnd[beam][0]) * itsSubbandBandwidth + 0.5)); + + // The fine delay is determined for the boundaries of the current + // time interval and is expressed in seconds. + double d = coarseDelay * itsSampleDuration; + + itsDelayedStamps[beam] -= coarseDelay; + itsSamplesDelay[beam] = -coarseDelay; + + for (unsigned pencil = 0; pencil < itsNrTABs + 1; pencil ++) { + // we don't do coarse delay compensation for the individual pencil beams to avoid complexity and overhead + itsFineDelaysAtBegin[beam][pencil] = static_cast<float>(itsDelaysAtBegin[beam][pencil] - d); + itsFineDelaysAfterEnd[beam][pencil] = static_cast<float>(itsDelaysAfterEnd[beam][pencil] - d); + } + } + + itsDelayTimer.stop(); +} + + +template<typename SAMPLE_TYPE> void BeamletBufferToComputeNode<SAMPLE_TYPE>::startTransaction() +{ + for (unsigned rsp = 0; rsp < itsNrInputs; rsp ++) { + itsBeamletBuffers[rsp]->startReadTransaction(itsDelayedStamps, itsNrSamplesPerSubband + itsNrHistorySamples); + + for (unsigned beam = 0; beam < itsNrBeams; beam ++) + /*if (itsMustComputeFlags[rsp][beam])*/ { // TODO + itsFlags[rsp][beam] = itsBeamletBuffers[rsp]->readFlags(beam); + limitFlagsLength(itsFlags[rsp][beam]); + } + } +} + + +template<typename SAMPLE_TYPE> void BeamletBufferToComputeNode<SAMPLE_TYPE>::writeLogMessage() const +{ + std::stringstream logStr; + + logStr << itsLogPrefix << itsCurrentTimeStamp; + + if (itsIsRealTime) { + struct timeval tv; + + gettimeofday(&tv, 0); + + double currentTime = tv.tv_sec + tv.tv_usec / 1e6; + double expectedTime = itsCorrelationStartTime * itsSampleDuration; + + logStr << ", late: " << PrettyTime(currentTime - expectedTime); + } + + if (itsNeedDelays) { + for (unsigned beam = 0; beam < itsNrBeams; beam ++) + logStr << (beam == 0 ? ", delays: [" : ", ") << PrettyTime(itsDelaysAtBegin[beam][0], 7); + //logStr << (beam == 0 ? ", delays: [" : ", ") << PrettyTime(itsDelaysAtBegin[beam], 7) << " = " << itsSamplesDelay[beam] << " samples + " << PrettyTime(itsFineDelaysAtBegin[beam], 7); + + logStr << "]"; + } + + for (unsigned rsp = 0; rsp < itsNrInputs; rsp ++) + logStr << ", flags " << rsp << ": " << itsFlags[rsp][0] << '(' << std::setprecision(3) << (100.0 * itsFlags[rsp][0].count() / (itsNrSamplesPerSubband + itsNrHistorySamples)) << "%)"; // not really correct; beam(0) may be shifted + + LOG_INFO(logStr.str()); +} + +template<typename SAMPLE_TYPE> void BeamletBufferToComputeNode<SAMPLE_TYPE>::toComputeNodes() +{ + CN_Command command(CN_Command::PROCESS, itsBlockNumber ++); + + if (!itsPhaseOneTwoStreams.empty()) { + // If the total number of subbands is not dividable by the nrSubbandsPerPset, + // we may have to send dummy process commands, without sending subband data. + + for (unsigned subbandBase = 0; subbandBase < itsNrSubbandsPerPset; subbandBase ++) { + Stream *stream = itsPhaseOneTwoStreams[itsCurrentPhaseOneTwoComputeCore]; + + // tell CN to process data +#if defined CLUSTER_SCHEDULING + if (itsPsetNumber == 0) +#endif + command.write(stream); + + if (itsNrInputs > 0) { + // create and send all metadata in one "large" message, since initiating a message + // has significant overhead in FCNP. + SubbandMetaData metaData(itsNrPhaseTwoPsets, itsNrTABs + 1); + + for (unsigned psetIndex = 0; psetIndex < itsNrPhaseTwoPsets; psetIndex ++) { + unsigned subband = itsNrSubbandsPerPset * psetIndex + subbandBase; + + if (subband < itsNrSubbands) { + unsigned rspBoard = itsSubbandToRSPboardMapping[subband]; + unsigned beam = itsSubbandToSAPmapping[subband]; + + if (itsNeedDelays) { + for (unsigned p = 0; p < itsNrTABs + 1; p ++) { + struct SubbandMetaData::beamInfo &beamInfo = metaData.beams(psetIndex)[p]; + + beamInfo.delayAtBegin = itsFineDelaysAtBegin[beam][p]; + beamInfo.delayAfterEnd = itsFineDelaysAfterEnd[beam][p]; + + // extract the carthesian coordinates + const casa::Vector<double> &beamDirBegin = itsBeamDirectionsAtBegin[beam][p].getValue(); + const casa::Vector<double> &beamDirEnd = itsBeamDirectionsAfterEnd[beam][p].getValue(); + + for (unsigned i = 0; i < 3; i ++) { + beamInfo.beamDirectionAtBegin[i] = beamDirBegin[i]; + beamInfo.beamDirectionAfterEnd[i] = beamDirEnd[i]; + } + } + } + + metaData.alignmentShift(psetIndex) = itsBeamletBuffers[rspBoard]->alignmentShift(beam); + metaData.setFlags(psetIndex, itsFlags[rspBoard][beam]); + } + } + + metaData.write(stream); + + // now send all subband data + for (unsigned psetIndex = 0; psetIndex < itsNrPhaseTwoPsets; psetIndex ++) { + unsigned subband = itsNrSubbandsPerPset * psetIndex + subbandBase; + + if (subband < itsNrSubbands) { + unsigned rspBoard = itsSubbandToRSPboardMapping[subband]; + unsigned rspSlot = itsSubbandToRSPslotMapping[subband]; + unsigned beam = itsSubbandToSAPmapping[subband]; + + itsBeamletBuffers[rspBoard]->sendSubband(stream, rspSlot, beam); + } + } + } + + if (++ itsCurrentPhaseOneTwoComputeCore == itsPhaseOneTwoStreams.size()) + itsCurrentPhaseOneTwoComputeCore = 0; + } + } +} + + +template<typename SAMPLE_TYPE> void BeamletBufferToComputeNode<SAMPLE_TYPE>::dumpRawData() +{ + // NOTE: we always have station input at this point + + std::string stationName = itsPS.getStationNamesAndRSPboardNumbers(itsPsetNumber)[0].station; // TODO: support more than one station + + vector<unsigned> subbandToSAPmapping = itsPS.subbandToSAPmapping(); + vector<unsigned> subbandToRSPboardMapping = itsPS.subbandToRSPboardMapping(stationName); + vector<unsigned> subbandToRSPslotMapping = itsPS.subbandToRSPslotMapping(stationName); + unsigned nrSubbands = itsPS.nrSubbands(); + BFRawFormat bfraw_data; + + if (!itsFileHeaderWritten) { + if (nrSubbands > 62) + THROW(IONProcException, "too many subbands for raw data format"); + + memset(&bfraw_data.header, 0, sizeof bfraw_data.header); + + bfraw_data.header.magic = 0x3F8304EC; + bfraw_data.header.bitsPerSample = 16; + bfraw_data.header.nrPolarizations = 2; + bfraw_data.header.nrSubbands = nrSubbands; + bfraw_data.header.nrSamplesPerSubband = itsNrSamplesPerSubband; + bfraw_data.header.subbandBandwidth = itsSubbandBandwidth; + + strncpy(bfraw_data.header.station, itsPS.getStationNamesAndRSPboardNumbers(itsPsetNumber)[0].station.c_str(), sizeof bfraw_data.header.station); + memcpy(bfraw_data.header.subbandFrequencies, &itsPS.subbandToFrequencyMapping()[0], nrSubbands * sizeof(double)); + + for (unsigned beam = 0; beam < itsNrBeams; beam ++) + memcpy(bfraw_data.header.beamDirections[beam], &itsPS.getBeamDirection(beam)[0], sizeof bfraw_data.header.beamDirections[beam]); + + for (unsigned subband = 0; subband < nrSubbands; subband ++) + bfraw_data.header.subbandToSAPmapping[subband] = subbandToSAPmapping[subband]; + + itsRawDataStream->write(&bfraw_data.header, sizeof bfraw_data.header); + itsFileHeaderWritten = true; + } + + memset(&bfraw_data.block_header, 0, sizeof bfraw_data.block_header); + + bfraw_data.block_header.magic = 0x2913D852; + + for (unsigned beam = 0; beam < itsNrBeams; beam ++) { + bfraw_data.block_header.coarseDelayApplied[beam] = itsSamplesDelay[beam]; + bfraw_data.block_header.fineDelayRemainingAtBegin[beam] = itsFineDelaysAtBegin[beam][0]; + bfraw_data.block_header.fineDelayRemainingAfterEnd[beam] = itsFineDelaysAfterEnd[beam][0]; + bfraw_data.block_header.time[beam] = itsDelayedStamps[beam]; + + // FIXME: the current BlockHeader format does not provide space for + // the flags from multiple RSP boards --- use the flags of RSP board 0 + itsFlags[0][beam].marshall(reinterpret_cast<char *>(&bfraw_data.block_header.flags[beam]), sizeof(BFRawFormat::BlockHeader::marshalledFlags)); + } + + itsRawDataStream->write(&bfraw_data.block_header, sizeof bfraw_data.block_header); + + for (unsigned subband = 0; subband < nrSubbands; subband ++) + itsBeamletBuffers[subbandToRSPboardMapping[subband]]->sendUnalignedSubband(itsRawDataStream, subbandToRSPslotMapping[subband], subbandToSAPmapping[subband]); +} + + +template<typename SAMPLE_TYPE> void BeamletBufferToComputeNode<SAMPLE_TYPE>::stopTransaction() +{ + for (unsigned rsp = 0; rsp < itsNrInputs; rsp ++) + itsBeamletBuffers[rsp]->stopReadTransaction(); +} + + +template<typename SAMPLE_TYPE> void BeamletBufferToComputeNode<SAMPLE_TYPE>::process() +{ + // stay in sync with other psets even if there are no inputs to allow a synchronised early abort + + if (itsNrInputs > 0) + for (unsigned beam = 0; beam < itsNrBeams; beam ++) + itsDelayedStamps[beam] = itsCurrentTimeStamp - itsNrHistorySamples; + + if (itsNeedDelays) + computeDelays(); + + if (itsIsRealTime) { + itsCorrelationStartTime = itsCurrentTimeStamp + itsNrSamplesPerSubband + itsMaxNetworkDelay + itsMaximumDelay; + + itsWallClock.waitUntil(itsCorrelationStartTime); + } + + if (itsNrInputs > 0) { + startTransaction(); + writeLogMessage(); + } + + NSTimer timer; + timer.start(); + + if (!itsDumpRawData) + toComputeNodes(); + else if (itsNrInputs > 0) + dumpRawData(); + + if (itsNrInputs > 0) { + stopTransaction(); + } + + itsCurrentTimeStamp += itsNrSamplesPerSubband; + timer.stop(); + + if (itsNrInputs > 0) + LOG_DEBUG_STR(itsLogPrefix << " ION->CN: " << PrettyTime(timer.getElapsed())); +} + +template class BeamletBufferToComputeNode<i4complex>; +template class BeamletBufferToComputeNode<i8complex>; +template class BeamletBufferToComputeNode<i16complex>; + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/GPUProc/src/BeamletBufferToComputeNode.h b/RTCP/GPUProc/src/BeamletBufferToComputeNode.h new file mode 100644 index 0000000000000000000000000000000000000000..b3836466563f9a054a5c8a8573f87a76073e454d --- /dev/null +++ b/RTCP/GPUProc/src/BeamletBufferToComputeNode.h @@ -0,0 +1,130 @@ +//# BeamletBufferToComputeNode.h: Catch RSP ethernet frames and synchronize RSP inputs +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: BeamletBufferToComputeNode.h 17893 2011-04-29 09:04:10Z romein $ + +#ifndef LOFAR_IONPROC_BEAMLET_BUFFER_TO_COMPUTE_NODE_H +#define LOFAR_IONPROC_BEAMLET_BUFFER_TO_COMPUTE_NODE_H + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + +//# Includes +#include <Interface/MultiDimArray.h> +#include <Interface/Parset.h> +#include <Interface/RSPTimeStamp.h> +#include <Interface/SmartPtr.h> +#include <Stream/Stream.h> +#include <BeamletBuffer.h> +#include <Delays.h> + +#include <casa/Quanta/MVDirection.h> + +#include <boost/multi_array.hpp> +#include <pthread.h> + +#include <string> +#include <vector> + + +namespace LOFAR { +namespace RTCP { + +template <typename SAMPLE_TYPE> class BeamletBufferToComputeNode { + public: + BeamletBufferToComputeNode(const Parset &ps, const std::vector<Stream *> &phaseOneTwoStreams, const std::vector<SmartPtr<BeamletBuffer<SAMPLE_TYPE> > > &beamletBuffers, unsigned psetNumber); + ~BeamletBufferToComputeNode(); + + void process(); + + TimeStamp getCurrentTimeStamp() const { return itsCurrentTimeStamp; } + + private: + static void limitFlagsLength(SparseSet<unsigned> &flags); + + void computeDelays(), computeNextDelays(); + + void startTransaction(); + void writeLogMessage() const; + void toComputeNodes(); + void stopTransaction(); + + void dumpRawData(); + SmartPtr<Stream> itsRawDataStream; + bool itsFileHeaderWritten; + + std::string itsLogPrefix; + + bool itsDelayCompensation; + bool itsCorrectClocks; + bool itsNeedDelays; + bool itsIsRealTime; + bool itsDumpRawData; + std::vector<unsigned> itsSubbandToSAPmapping; + std::vector<unsigned> itsSubbandToRSPboardMapping; + std::vector<unsigned> itsSubbandToRSPslotMapping; + + const std::vector<Stream *> &itsPhaseOneTwoStreams; + + const Parset &itsPS; + + TimeStamp itsCurrentTimeStamp; + + Matrix<double> itsDelaysAtBegin; + Matrix<double> itsDelaysAfterEnd; + Matrix<casa::MVDirection> itsBeamDirectionsAtBegin; + Matrix<casa::MVDirection> itsBeamDirectionsAfterEnd; + unsigned itsNrPhaseTwoPsets; + unsigned itsObservationID; + + unsigned itsMaxNetworkDelay; // in samples + unsigned itsNrSubbands; + unsigned itsNrSubbandsPerPset; + unsigned itsNrSamplesPerSubband; + unsigned itsNrHistorySamples; + unsigned itsNrInputs; + unsigned itsNrBeams; + unsigned itsNrTABs; + + unsigned itsCurrentPhaseOneTwoComputeCore; + unsigned itsPsetNumber; + + const std::vector<SmartPtr<BeamletBuffer<SAMPLE_TYPE> > > &itsBeamletBuffers; + unsigned itsBlockNumber; + SmartPtr<Delays> itsDelays; + double itsSubbandBandwidth, itsSampleDuration; + double itsClockCorrectionTime; + + std::vector<TimeStamp> itsDelayedStamps; + std::vector<signed int> itsSamplesDelay; + boost::multi_array<SparseSet<unsigned>, 2> itsFlags; + + Matrix<float> itsFineDelaysAtBegin, itsFineDelaysAfterEnd; + + static const unsigned itsMaximumDelay = 1000; // samples; roughly 1500 km + TimeStamp itsCorrelationStartTime; + WallClockTime itsWallClock; + + NSTimer itsDelayTimer; +}; + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/CL/cl.hpp b/RTCP/GPUProc/src/CL/cl.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5111f37dc8c74a540999ff6a06b9ae7ec07b6455 --- /dev/null +++ b/RTCP/GPUProc/src/CL/cl.hpp @@ -0,0 +1,75024 @@ +#include <iostream> +/******************************************************************************* + * Copyright (c) 2008-2011 The Khronos Group Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and/or associated documentation files (the + * "Materials"), to deal in the Materials without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Materials, and to + * permit persons to whom the Materials are furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Materials. + * + * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. + ******************************************************************************/ + +/*! \file + * + * \brief C++ bindings for OpenCL 1.0 (rev 48) and OpenCL 1.1 (rev 33) + * \author Benedict R. Gaster and Laurent Morichetti + * + * Additions and fixes from Brian Cole, March 3rd 2010. + * + * \version 1.1 + * \date June 2010 + * + * Optional extension support + * + * cl + * cl_ext_device_fission + * #define USE_CL_DEVICE_FISSION + */ + +/*! \mainpage + * \section intro Introduction + * For many large applications C++ is the language of choice and so it seems + * reasonable to define C++ bindings for OpenCL. + * + * + * The interface is contained with a single C++ header file \em cl.hpp and all + * definitions are contained within the namespace \em cl. There is no additional + * requirement to include \em cl.h and to use either the C++ or original C + * bindings it is enough to simply include \em cl.hpp. + * + * The bindings themselves are lightweight and correspond closely to the + * underlying C API. Using the C++ bindings introduces no additional execution + * overhead. + * + * For detail documentation on the bindings see: + * + * The OpenCL C++ Wrapper API 1.1 (revision 04) + * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.1.pdf + * + * \section example Example + * + * The following example shows a general use case for the C++ + * bindings, including support for the optional exception feature and + * also the supplied vector and string classes, see following sections for + * decriptions of these features. + * + * \code + * #define __CL_ENABLE_EXCEPTIONS + * + * #if defined(__APPLE__) || defined(__MACOSX) + * #include <OpenCL/cl.hpp> + * #else + * #include <CL/cl.hpp> + * #endif + * #include <cstdio> + * #include <cstdlib> + * #include <iostream> + * + * const char * helloStr = "__kernel void " + * "hello(void) " + * "{ " + * " " + * "} "; + * + * int + * main(void) + * { + * cl_int err = CL_SUCCESS; + * try { + * + * std::vector<cl::Platform> platforms; + * cl::Platform::get(&platforms); + * if (platforms.size() == 0) { + * std::cout << "Platform size 0\n"; + * return -1; + * } + * + * cl_context_properties properties[] = + * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; + * cl::Context context(CL_DEVICE_TYPE_CPU, properties); + * + * std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); + * + * cl::Program::Sources source(1, + * std::make_pair(helloStr,strlen(helloStr))); + * cl::Program program_ = cl::Program(context, source); + * program_.build(devices); + * + * cl::Kernel kernel(program_, "hello", &err); + * + * cl::Event event; + * cl::CommandQueue queue(context, devices[0], 0, &err); + * queue.enqueueNDRangeKernel( + * kernel, + * cl::NullRange, + * cl::NDRange(4,4), + * cl::NullRange, + * NULL, + * &event); + * + * event.wait(); + * } + * catch (cl::Error err) { + * std::cerr + * << "ERROR: " + * << err.what() + * << "(" + * << err.err() + * << ")" + * << std::endl; + * } + * + * return EXIT_SUCCESS; + * } + * + * \endcode + * + */ +#ifndef CL_HPP_ +#define CL_HPP_ + +#ifdef _WIN32 +#include <windows.h> +#include <malloc.h> +#pragma push_macro("max") +#undef max +#if defined(USE_DX_INTEROP) +#include <CL/cl_d3d10.h> +#endif +#endif // _WIN32 + +// +#if defined(USE_CL_DEVICE_FISSION) +#include <CL/cl_ext.h> +#endif + +#if defined(__APPLE__) || defined(__MACOSX) +#include <OpenGL/OpenGL.h> +#include <OpenCL/opencl.h> +#include <libkern/OSAtomic.h> +#else +#include <GL/gl.h> +#include <CL/opencl.h> +#endif // !__APPLE__ + +#if !defined(CL_CALLBACK) +#define CL_CALLBACK +#endif //CL_CALLBACK + +#include <utility> +#include <limits> + +#if !defined(__NO_STD_VECTOR) +#include <vector> +#endif + +#if !defined(__NO_STD_STRING) +#include <string> +#endif + +#if defined(linux) || defined(__APPLE__) || defined(__MACOSX) +# include <alloca.h> +#endif // linux + +#include <cstring> + +#if defined(_WIN32) +#include <functional> +#else +#include <tr1/functional> +#endif + +#include "Common/Exception.h" + +/*! \namespace cl + * + * \brief The OpenCL C++ bindings are defined within this namespace. + * + */ +namespace cl { + +#define __INIT_CL_EXT_FCN_PTR(name) \ + if(!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddress(#name); \ + if(!pfn_##name) { \ + } \ + } + +class Program; +class Device; +class Context; +class CommandQueue; +class Memory; + +#if defined(__CL_ENABLE_EXCEPTIONS) +//#include <exception> +/*! \class Error + * \brief Exception class + */ +//class Error : public std::exception +class Error : public LOFAR::Exception +{ +private: + cl_int err_; + const char * errStr_; +public: + /*! Create a new CL error exception for a given error code + * and corresponding message. + */ + //Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) + Error(cl_int err, const char * errStr = NULL) : + LOFAR::Exception(errStr ? errStr : "", THROW_ARGS), + err_(err), errStr_(errStr) + {} + + ~Error() throw() {} + + /*! \brief Get error string associated with exception + * + * \return A memory pointer to the error message string. + */ + virtual const char * what() const throw () + { + if (errStr_ == NULL) { + return "empty"; + } + else { + return errStr_; + } + } + + /*! \brief Get error code associated with exception + * + * \return The error code. + */ + cl_int err(void) const { return err_; } +}; + +#define __ERR_STR(x) #x +#else +#define __ERR_STR(x) NULL +#endif // __CL_ENABLE_EXCEPTIONS + +//! \cond DOXYGEN_DETAIL +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) +#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) +#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) +#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) +#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) +#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) +#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) +#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) +#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) +#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) +#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) +#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) +#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) +#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) +#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) + +#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) +#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) +#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) + +#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) +#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) +#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) +#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) +#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) +#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) +#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) +#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) +#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) +#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) + +#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) +#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) +#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) +#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) + +#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) +#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) +#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) +#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) +#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) +#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) + +#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) +#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) +#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) +#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) +#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) +#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) +#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) +#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) +#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) +#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) +#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) +#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) +#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) +#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) +#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) +#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) +#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) +#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) +#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) +#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) +#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) +#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) + +#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) +#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) + +#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) + +#define __FLUSH_ERR __ERR_STR(clFlush) +#define __FINISH_ERR __ERR_STR(clFinish) + +#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) +#endif // __CL_USER_OVERRIDE_ERROR_STRINGS +//! \endcond + +/*! \class string + * \brief Simple string class, that provides a limited subset of std::string + * functionality but avoids many of the issues that come with that class. + */ +class string +{ +private: + ::size_t size_; + char * str_; +public: + string(void) : size_(0), str_(NULL) + { + } + + string(char * str, ::size_t size) : + size_(size), + str_(NULL) + { + str_ = new char[size_+1]; + if (str_ != NULL) { + memcpy(str_, str, size_ * sizeof(char)); + str_[size_] = '\0'; + } + else { + size_ = 0; + } + } + + string(char * str) : + str_(NULL) + { + size_= ::strlen(str); + str_ = new char[size_ + 1]; + if (str_ != NULL) { + memcpy(str_, str, (size_ + 1) * sizeof(char)); + } + else { + size_ = 0; + } + } + + string& operator=(const string& rhs) + { + if (this == &rhs) { + return *this; + } + + if (rhs.size_ == 0 || rhs.str_ == NULL) { + size_ = 0; + str_ = NULL; + } + else { + size_ = rhs.size_; + str_ = new char[size_ + 1]; + if (str_ != NULL) { + memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); + } + else { + size_ = 0; + } + } + + return *this; + } + + string(const string& rhs) + { + *this = rhs; + } + + ~string() + { + if (str_ != NULL) { + delete[] str_; + } + } + + ::size_t size(void) const { return size_; } + ::size_t length(void) const { return size(); } + + const char * c_str(void) const { return (str_) ? str_ : "";} +}; + +#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) +#include <string> +typedef std::string STRING_CLASS; +#elif !defined(__USE_DEV_STRING) +typedef cl::string STRING_CLASS; +#endif + +#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) +#include <vector> +#define VECTOR_CLASS std::vector +#elif !defined(__USE_DEV_VECTOR) +#define VECTOR_CLASS cl::vector +#endif + +#if !defined(__MAX_DEFAULT_VECTOR_SIZE) +#define __MAX_DEFAULT_VECTOR_SIZE 10 +#endif + +/*! \class vector + * \brief Fixed sized vector implementation that mirroring + * std::vector functionality. + */ +template <typename T, unsigned int N = __MAX_DEFAULT_VECTOR_SIZE> +class vector +{ +private: + T data_[N]; + unsigned int size_; + bool empty_; +public: + vector() : + size_(static_cast<unsigned int>(-1)), + empty_(true) + {} + + ~vector() {} + + unsigned int size(void) const + { + return size_ + 1; + } + + void clear() + { + size_ = -1; + empty_ = true; + } + + void push_back (const T& x) + { + if (size() < N) { + size_++; + data_[size_] = x; + empty_ = false; + } + } + + void pop_back(void) + { + if (!empty_) { + data_[size_].~T(); + size_--; + if (size_ == -1) { + empty_ = true; + } + } + } + + vector(const vector<T, N>& vec) : + size_(vec.size_), + empty_(vec.empty_) + { + if (!empty_) { + memcpy(&data_[0], &vec.data_[0], size() * sizeof(T)); + } + } + + vector(unsigned int size, const T& val = T()) : + size_(-1), + empty_(true) + { + for (unsigned int i = 0; i < size; i++) { + push_back(val); + } + } + + vector<T, N>& operator=(const vector<T, N>& rhs) + { + if (this == &rhs) { + return *this; + } + + size_ = rhs.size_; + empty_ = rhs.empty_; + + if (!empty_) { + memcpy(&data_[0], &rhs.data_[0], size() * sizeof(T)); + } + + return *this; + } + + bool operator==(vector<T,N> &vec) + { + if (empty_ && vec.empty_) { + return true; + } + + if (size() != vec.size()) { + return false; + } + + return memcmp(&data_[0], &vec.data_[0], size() * sizeof(T)) == 0 ? true : false; + } + + operator T* () { return data_; } + operator const T* () const { return data_; } + + bool empty (void) const + { + return empty_; + } + + unsigned int max_size (void) const + { + return N; + } + + unsigned int capacity () const + { + return sizeof(T) * N; + } + + T& operator[](int index) + { + return data_[index]; + } + + T operator[](int index) const + { + return data_[index]; + } + + template<class I> + void assign(I start, I end) + { + clear(); + while(start < end) { + push_back(*start); + start++; + } + } + + /*! \class iterator + * \brief Iterator class for vectors + */ + class iterator + { + private: + vector<T,N> vec_; + int index_; + bool initialized_; + public: + iterator(void) : + index_(-1), + initialized_(false) + { + index_ = -1; + initialized_ = false; + } + + ~iterator(void) {} + + static iterator begin(vector<T,N> &vec) + { + iterator i; + + if (!vec.empty()) { + i.index_ = 0; + } + + i.vec_ = vec; + i.initialized_ = true; + return i; + } + + static iterator end(vector<T,N> &vec) + { + iterator i; + + if (!vec.empty()) { + i.index_ = vec.size(); + } + i.vec_ = vec; + i.initialized_ = true; + return i; + } + + bool operator==(iterator i) + { + return ((vec_ == i.vec_) && + (index_ == i.index_) && + (initialized_ == i.initialized_)); + } + + bool operator!=(iterator i) + { + return (!(*this==i)); + } + + void operator++() + { + index_++; + } + + void operator++(int x) + { + index_ += x; + } + + void operator--() + { + index_--; + } + + void operator--(int x) + { + index_ -= x; + } + + T operator *() + { + return vec_[index_]; + } + }; + + iterator begin(void) + { + return iterator::begin(*this); + } + + iterator end(void) + { + return iterator::end(*this); + } + + T& front(void) + { + return data_[0]; + } + + T& back(void) + { + return data_[size_]; + } + + const T& front(void) const + { + return data_[0]; + } + + const T& back(void) const + { + return data_[size_]; + } +}; + +namespace detail { +#define __DEFAULT_NOT_INITIALIZED 1 +#define __DEFAULT_BEING_INITIALIZED 2 +#define __DEFAULT_INITIALIZED 4 + + /* + * Compare and exchange primitives are needed for handling of defaults + */ + inline int compare_exchange(volatile int * dest, int exchange, int comparand) + { +#ifdef _WIN32 + return (int)(InterlockedCompareExchange( + (volatile long*)dest, + (long)exchange, + (long)comparand)); +#elif defined(__APPLE__) || defined(__MACOSX) + return OSAtomicOr32Orig((uint32_t)exchange, (volatile uint32_t*)dest); +#else // !_WIN32 || defined(__APPLE__) || defined(__MACOSX) + return (__sync_val_compare_and_swap( + dest, + comparand, + exchange)); +#endif // !_WIN32 + } +}; // namespace detail + + +/*! + * \brief size_t class used to interface between C++ and + * OpenCL C calls that require arrays of size_t values, who's + * size is known statically. + */ +template <int N> +struct size_t : public cl::vector< ::size_t, N> { }; + +namespace detail { + +// GetInfo help struct +template <typename Functor, typename T> +struct GetInfoHelper +{ + static cl_int + get(Functor f, cl_uint name, T* param) + { + return f(name, sizeof(T), param, NULL); + } +}; + +// Specialized GetInfoHelper for VECTOR_CLASS params +template <typename Func, typename T> +struct GetInfoHelper<Func, VECTOR_CLASS<T> > +{ + static cl_int get(Func f, cl_uint name, VECTOR_CLASS<T>* param) + { + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + T* value = (T*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + param->assign(&value[0], &value[required/sizeof(T)]); + return CL_SUCCESS; + } +}; + +// Specialized for getInfo<CL_PROGRAM_BINARIES> +template <typename Func> +struct GetInfoHelper<Func, VECTOR_CLASS<char *> > +{ + static cl_int + get(Func f, cl_uint name, VECTOR_CLASS<char *>* param) + { + ::size_t nDevices; + ::size_t * binary_sizes; + char ** values; + + cl_int err = f(CL_PROGRAM_NUM_DEVICES, sizeof(nDevices), &nDevices, NULL); + if (err != CL_SUCCESS) { + return err; + } + + binary_sizes = (::size_t*)alloca(sizeof(::size_t)*nDevices); + err = f(CL_PROGRAM_BINARY_SIZES, sizeof(::size_t)*nDevices, binary_sizes, NULL); + if (err != CL_SUCCESS) { + return err; + } + + values = (char **) alloca(sizeof(char*)*nDevices); + for(cl_uint i = 0; i < nDevices; i++ ) + { + if( binary_sizes[i] != 0 ) + { + values[i]= (char *)alloca( sizeof(char)*binary_sizes[i]); + } + else + { + values[i] = NULL; + } + } + err = f(name, sizeof(char *)*nDevices, values, NULL); + if (err != CL_SUCCESS) { + return err; + } + + param->assign(values,values+nDevices); + return CL_SUCCESS; + } +}; + +// Specialized GetInfoHelper for STRING_CLASS params +template <typename Func> +struct GetInfoHelper<Func, STRING_CLASS> +{ + static cl_int get(Func f, cl_uint name, STRING_CLASS* param) + { + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + char* value = (char*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + *param = value; + return CL_SUCCESS; + } +}; + +// Specialized GetInfoHelper for cl::size_t params +template <typename Func, ::size_t N> +struct GetInfoHelper<Func, size_t<N> > +{ + static cl_int get(Func f, cl_uint name, size_t<N>* param) + { + ::size_t required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; + } + + ::size_t* value = (::size_t*) alloca(required); + err = f(name, required, value, NULL); + if (err != CL_SUCCESS) { + return err; + } + + param->assign(&value[0], &value[required/sizeof(::size_t)]); + return CL_SUCCESS; + } +}; + +#define __GET_INFO_HELPER_WITH_RETAIN(CPP_TYPE) \ +namespace detail { \ +template <typename Func> \ +struct GetInfoHelper<Func, CPP_TYPE> \ +{ \ + static cl_int get(Func f, cl_uint name, CPP_TYPE* param) \ + { \ + cl_uint err = f(name, sizeof(CPP_TYPE), param, NULL); \ + if (err != CL_SUCCESS) { \ + return err; \ + } \ + \ + return ReferenceHandler<CPP_TYPE::cl_type>::retain((*param)()); \ + } \ +}; \ +} + + +#define __PARAM_NAME_INFO_1_0(F) \ + F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ + F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ + F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ + F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ + F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ + F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ + F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ + F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ + \ + F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ + F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS<Device>) \ + F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS<cl_context_properties>) \ + \ + F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ + F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ + F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ + F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \ + \ + F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ + \ + F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ + F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ + F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ + F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ + F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ + \ + F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ + F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ + F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ + F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ + F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ + F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ + \ + F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ + F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ + F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ + F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ + F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ + \ + F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ + F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ + F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ + F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS<cl_device_id>) \ + F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ + F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS<char *>) \ + \ + F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ + \ + F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ + F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ + F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ + F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ + \ + F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ + F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ + F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) + +#if defined(CL_VERSION_1_1) +#define __PARAM_NAME_INFO_1_1(F) \ + F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ + \ + F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ + F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ + \ + F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) +#endif // CL_VERSION_1_1 + +#if defined(USE_CL_DEVICE_FISSION) +#define __PARAM_NAME_DEVICE_FISSION(F) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \ + F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) +#endif // USE_CL_DEVICE_FISSION + +template <typename enum_type, cl_int Name> +struct param_traits {}; + +#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ +struct token; \ +template<> \ +struct param_traits<detail:: token,param_name> \ +{ \ + enum { value = param_name }; \ + typedef T param_type; \ +}; + +__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) +#if defined(CL_VERSION_1_1) +__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) +#endif // CL_VERSION_1_1 + +#if defined(USE_CL_DEVICE_FISSION) +__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); +#endif // USE_CL_DEVICE_FISSION + +#ifdef CL_PLATFORM_ICD_SUFFIX_KHR +__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) +#endif + +#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) +#endif + +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) +#endif +#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) +#endif +#ifdef CL_DEVICE_WARP_SIZE_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) +#endif +#ifdef CL_DEVICE_GPU_OVERLAP_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) +#endif +#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) +#endif +#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV +__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) +#endif + +// Convenience functions + +template <typename Func, typename T> +inline cl_int +getInfo(Func f, cl_uint name, T* param) +{ + return GetInfoHelper<Func, T>::get(f, name, param); +} + +template <typename Func, typename Arg0> +struct GetInfoFunctor0 +{ + Func f_; const Arg0& arg0_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, param, size, value, size_ret); } +}; + +template <typename Func, typename Arg0, typename Arg1> +struct GetInfoFunctor1 +{ + Func f_; const Arg0& arg0_; const Arg1& arg1_; + cl_int operator ()( + cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + { return f_(arg0_, arg1_, param, size, value, size_ret); } +}; + +template <typename Func, typename Arg0, typename T> +inline cl_int +getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) +{ + GetInfoFunctor0<Func, Arg0> f0 = { f, arg0 }; + return GetInfoHelper<GetInfoFunctor0<Func, Arg0>, T> + ::get(f0, name, param); +} + +template <typename Func, typename Arg0, typename Arg1, typename T> +inline cl_int +getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) +{ + GetInfoFunctor1<Func, Arg0, Arg1> f0 = { f, arg0, arg1 }; + return GetInfoHelper<GetInfoFunctor1<Func, Arg0, Arg1>, T> + ::get(f0, name, param); +} + +template<typename T> +struct ReferenceHandler +{ }; + +template <> +struct ReferenceHandler<cl_device_id> +{ + // cl_device_id does not have retain(). + static cl_int retain(cl_device_id) + { return CL_INVALID_DEVICE; } + // cl_device_id does not have release(). + static cl_int release(cl_device_id) + { return CL_INVALID_DEVICE; } +}; + +template <> +struct ReferenceHandler<cl_platform_id> +{ + // cl_platform_id does not have retain(). + static cl_int retain(cl_platform_id) + { return CL_INVALID_PLATFORM; } + // cl_platform_id does not have release(). + static cl_int release(cl_platform_id) + { return CL_INVALID_PLATFORM; } +}; + +template <> +struct ReferenceHandler<cl_context> +{ + static cl_int retain(cl_context context) + { return ::clRetainContext(context); } + static cl_int release(cl_context context) + { return ::clReleaseContext(context); } +}; + +template <> +struct ReferenceHandler<cl_command_queue> +{ + static cl_int retain(cl_command_queue queue) + { return ::clRetainCommandQueue(queue); } + static cl_int release(cl_command_queue queue) + { return ::clReleaseCommandQueue(queue); } +}; + +template <> +struct ReferenceHandler<cl_mem> +{ + static cl_int retain(cl_mem memory) + { return ::clRetainMemObject(memory); } + static cl_int release(cl_mem memory) + { return ::clReleaseMemObject(memory); } +}; + +template <> +struct ReferenceHandler<cl_sampler> +{ + static cl_int retain(cl_sampler sampler) + { return ::clRetainSampler(sampler); } + static cl_int release(cl_sampler sampler) + { return ::clReleaseSampler(sampler); } +}; + +template <> +struct ReferenceHandler<cl_program> +{ + static cl_int retain(cl_program program) + { return ::clRetainProgram(program); } + static cl_int release(cl_program program) + { return ::clReleaseProgram(program); } +}; + +template <> +struct ReferenceHandler<cl_kernel> +{ + static cl_int retain(cl_kernel kernel) + { return ::clRetainKernel(kernel); } + static cl_int release(cl_kernel kernel) + { return ::clReleaseKernel(kernel); } +}; + +template <> +struct ReferenceHandler<cl_event> +{ + static cl_int retain(cl_event event) + { return ::clRetainEvent(event); } + static cl_int release(cl_event event) + { return ::clReleaseEvent(event); } +}; + +template <typename T> +class Wrapper +{ +public: + typedef T cl_type; + +protected: + cl_type object_; + +public: + Wrapper() : object_(NULL) { } + + Wrapper(const cl_type &obj) : object_(obj) { } + + ~Wrapper() + { + if (object_ != NULL) { release(); } + } + + Wrapper(const Wrapper<cl_type>& rhs) + { + object_ = rhs.object_; + if (object_ != NULL) { retain(); } + } + + Wrapper<cl_type>& operator = (const Wrapper<cl_type>& rhs) + { + if (object_ != NULL) { release(); } + object_ = rhs.object_; + if (object_ != NULL) { retain(); } + return *this; + } + + Wrapper<cl_type>& operator = (const cl_type &rhs) + { + if (object_ != NULL) { release(); } + object_ = rhs; + return *this; + } + + cl_type operator ()() const { return object_; } + + cl_type& operator ()() { return object_; } + +protected: + + cl_int retain() const + { + return ReferenceHandler<cl_type>::retain(object_); + } + + cl_int release() const + { + return ReferenceHandler<cl_type>::release(object_); + } +}; + +#if defined(__CL_ENABLE_EXCEPTIONS) +static inline cl_int errHandler ( + cl_int err, + const char * errStr = NULL) throw(Error) +{ + if (err != CL_SUCCESS) { + throw Error(err, errStr); + } + return err; +} +#else +static inline cl_int errHandler (cl_int err, const char * errStr = NULL) +{ + return err; +} +#endif // __CL_ENABLE_EXCEPTIONS + +} // namespace detail +//! \endcond + +/*! \stuct ImageFormat + * \brief ImageFormat interface fro cl_image_format. + */ +struct ImageFormat : public cl_image_format +{ + ImageFormat(){} + + ImageFormat(cl_channel_order order, cl_channel_type type) + { + image_channel_order = order; + image_channel_data_type = type; + } + + ImageFormat& operator = (const ImageFormat& rhs) + { + if (this != &rhs) { + this->image_channel_data_type = rhs.image_channel_data_type; + this->image_channel_order = rhs.image_channel_order; + } + return *this; + } +}; + +/*! \class Device + * \brief Device interface for cl_device_id. + */ +class Device : public detail::Wrapper<cl_device_id> +{ +public: + Device() : detail::Wrapper<cl_type>() { } + + Device(const Device& device) : detail::Wrapper<cl_type>(device) { } + + Device(const cl_device_id &device) : detail::Wrapper<cl_type>(device) { } + + static Device getDefault(cl_int * err = NULL); + + Device& operator = (const Device& rhs) + { + if (this != &rhs) { + detail::Wrapper<cl_type>::operator=(rhs); + } + return *this; + } + + Device& operator = (const cl_device_id& rhs) + { + detail::Wrapper<cl_type>::operator=(rhs); + return *this; + } + + template <typename T> + cl_int getInfo(cl_device_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetDeviceInfo, object_, name, param), + __GET_DEVICE_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_device_info, name>::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_device_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(USE_CL_DEVICE_FISSION) + cl_int createSubDevices( + const cl_device_partition_property_ext * properties, + VECTOR_CLASS<Device>* devices) + { + typedef CL_API_ENTRY cl_int + ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( + cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; + + static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; + __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); + + cl_uint n = 0; + cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_SUB_DEVICES); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif +}; + +/*! \class Platform + * \brief Platform interface. + */ +class Platform : public detail::Wrapper<cl_platform_id> +{ +public: + static const Platform null(); + + Platform() : detail::Wrapper<cl_type>() { } + + Platform(const Platform& platform) : detail::Wrapper<cl_type>(platform) { } + + Platform(const cl_platform_id &platform) : detail::Wrapper<cl_type>(platform) { } + + Platform& operator = (const Platform& rhs) + { + if (this != &rhs) { + detail::Wrapper<cl_type>::operator=(rhs); + } + return *this; + } + + Platform& operator = (const cl_platform_id& rhs) + { + detail::Wrapper<cl_type>::operator=(rhs); + return *this; + } + + cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetPlatformInfo, object_, name, param), + __GET_PLATFORM_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_platform_info, name>::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_platform_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int getDevices( + cl_device_type type, + VECTOR_CLASS<Device>* devices) const + { + cl_uint n = 0; + cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = ::clGetDeviceIDs(object_, type, n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } + +#if defined(USE_DX_INTEROP) + /*! \brief Get the list of available D3D10 devices. + * + * \param d3d_device_source. + * + * \param d3d_object. + * + * \param d3d_device_set. + * + * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device + * values returned in devices can be used to identify a specific OpenCL + * device. If \a devices argument is NULL, this argument is ignored. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully. + * + * The application can query specific capabilities of the OpenCL device(s) + * returned by cl::getDevices. This can be used by the application to + * determine which device(s) to use. + * + * \note In the case that exceptions are enabled and a return value + * other than CL_SUCCESS is generated, then cl::Error exception is + * generated. + */ + cl_int getDevices( + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + VECTOR_CLASS<Device>* devices) const + { + typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void * d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id * devices, + cl_uint* num_devices); + + static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; + __INIT_CL_EXT_FCN_PTR(clGetDeviceIDsFromD3D10KHR); + + cl_uint n = 0; + cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + 0, + NULL, + &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, + d3d_device_source, + d3d_object, + d3d_device_set, + n, + ids, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + devices->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } +#endif + + static cl_int get( + VECTOR_CLASS<Platform>* platforms) + { + cl_uint n = 0; + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + platforms->assign(&ids[0], &ids[n]); + return CL_SUCCESS; + } + + static cl_int get( + Platform * platform) + { + cl_uint n = 0; + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + *platform = ids[0]; + return CL_SUCCESS; + } + + static Platform get( + cl_int * errResult = NULL) + { + Platform platform; + cl_uint n = 0; + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + if (errResult != NULL) { + *errResult = err; + } + } + + cl_platform_id* ids = (cl_platform_id*) alloca( + n * sizeof(cl_platform_id)); + err = ::clGetPlatformIDs(n, ids, NULL); + + if (err != CL_SUCCESS) { + detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + } + + if (errResult != NULL) { + *errResult = err; + } + + return ids[0]; + } +}; + +static inline cl_int +UnloadCompiler() +{ + return ::clUnloadCompiler(); +} + +class Context : public detail::Wrapper<cl_context> +{ +private: + static volatile int default_initialized_; + static Context default_; + static volatile cl_int default_error_; +public: + Context( + const VECTOR_CLASS<Device>& devices, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateContext( + properties, (cl_uint) devices.size(), + (cl_device_id*) &devices.front(), + notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != NULL) { + *err = error; + } + } + + Context( + cl_device_type type, + cl_context_properties* properties = NULL, + void (CL_CALLBACK * notifyFptr)( + const char *, + const void *, + ::size_t, + void *) = NULL, + void* data = NULL, + cl_int* err = NULL) + { + cl_int error; + +#if !defined(__APPLE__) || !defined(__MACOS) + cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; + if (properties == NULL) { + prop[1] = (cl_context_properties)Platform::get(&error)(); + if (error != CL_SUCCESS) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + return; + } + } + + properties = &prop[0]; + } +#endif + object_ = ::clCreateContextFromType( + properties, type, notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != NULL) { + *err = error; + } + } + + static Context getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + ; + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + default_ = Context( + CL_DEVICE_TYPE_DEFAULT, + NULL, + NULL, + NULL, + &error); + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + + Context() : detail::Wrapper<cl_type>() { } + + Context(const Context& context) : detail::Wrapper<cl_type>(context) { } + + Context(const cl_context& context) : detail::Wrapper<cl_type>(context) { } + + Context& operator = (const Context& rhs) + { + if (this != &rhs) { + detail::Wrapper<cl_type>::operator=(rhs); + } + return *this; + } + + Context& operator = (const cl_context& rhs) + { + detail::Wrapper<cl_type>::operator=(rhs); + return *this; + } + + template <typename T> + cl_int getInfo(cl_context_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetContextInfo, object_, name, param), + __GET_CONTEXT_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_context_info, name>::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_context_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int getSupportedImageFormats( + cl_mem_flags flags, + cl_mem_object_type type, + VECTOR_CLASS<ImageFormat>* formats) const + { + cl_uint numEntries; + cl_int err = ::clGetSupportedImageFormats( + object_, + flags, + type, + 0, + NULL, + &numEntries); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + ImageFormat* value = (ImageFormat*) + alloca(numEntries * sizeof(ImageFormat)); + err = ::clGetSupportedImageFormats( + object_, + flags, + type, + numEntries, + (cl_image_format*) value, + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + formats->assign(&value[0], &value[numEntries]); + return CL_SUCCESS; + } +}; + +inline Device Device::getDefault(cl_int * err) +{ + cl_int error; + Device device; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + device = context.getInfo<CL_CONTEXT_DEVICES>()[0]; + if (err != NULL) { + *err = CL_SUCCESS; + } + } + + return device; +} + +#ifdef _WIN32 +__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__declspec(selectany) Context Context::default_; +__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS; +#else +__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__attribute__((weak)) Context Context::default_; +__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS; +#endif + +__GET_INFO_HELPER_WITH_RETAIN(cl::Context) + +/*! \class Event + * \brief Event interface for cl_event. + */ +class Event : public detail::Wrapper<cl_event> +{ +public: + Event() : detail::Wrapper<cl_type>() { } + + Event(const Event& event) : detail::Wrapper<cl_type>(event) { } + + Event(const cl_event& event) : detail::Wrapper<cl_type>(event) { } + + Event& operator = (const Event& rhs) + { + if (this != &rhs) { + detail::Wrapper<cl_type>::operator=(rhs); + } + return *this; + } + + Event& operator = (const cl_event& rhs) + { + detail::Wrapper<cl_type>::operator=(rhs); + return *this; + } + + template <typename T> + cl_int getInfo(cl_event_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetEventInfo, object_, name, param), + __GET_EVENT_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_event_info, name>::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_event_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template <typename T> + cl_int getProfilingInfo(cl_profiling_info name, T* param) const + { + return detail::errHandler(detail::getInfo( + &::clGetEventProfilingInfo, object_, name, param), + __GET_EVENT_PROFILE_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_profiling_info, name>::param_type + getProfilingInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_profiling_info, name>::param_type param; + cl_int result = getProfilingInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int wait() const + { + return detail::errHandler( + ::clWaitForEvents(1, &object_), + __WAIT_FOR_EVENTS_ERR); + } + +#if defined(CL_VERSION_1_1) + cl_int setCallback( + cl_int type, + void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetEventCallback( + object_, + type, + pfn_notify, + user_data), + __SET_EVENT_CALLBACK_ERR); + } +#endif + + static cl_int + waitForEvents(const VECTOR_CLASS<Event>& events) + { + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (cl_event*)&events.front()), + __WAIT_FOR_EVENTS_ERR); + } +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::Event) + +#if defined(CL_VERSION_1_1) +/*! \class UserEvent + * \brief User event interface for cl_event. + */ +class UserEvent : public Event +{ +public: + UserEvent( + const Context& context, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateUserEvent( + context(), + &error); + + detail::errHandler(error, __CREATE_USER_EVENT_ERR); + if (err != NULL) { + *err = error; + } + } + + UserEvent() : Event() { } + + UserEvent(const UserEvent& event) : Event(event) { } + + UserEvent& operator = (const UserEvent& rhs) + { + if (this != &rhs) { + Event::operator=(rhs); + } + return *this; + } + + cl_int setStatus(cl_int status) + { + return detail::errHandler( + ::clSetUserEventStatus(object_,status), + __SET_USER_EVENT_STATUS_ERR); + } +}; +#endif + +inline static cl_int +WaitForEvents(const VECTOR_CLASS<Event>& events) +{ + return detail::errHandler( + ::clWaitForEvents( + (cl_uint) events.size(), (cl_event*)&events.front()), + __WAIT_FOR_EVENTS_ERR); +} + +/*! \class Memory + * \brief Memory interface for cl_mem. + */ +class Memory : public detail::Wrapper<cl_mem> +{ +public: + Memory() : detail::Wrapper<cl_type>() { } + + Memory(const Memory& memory) : detail::Wrapper<cl_type>(memory) { } + + Memory(const cl_mem& memory) : detail::Wrapper<cl_type>(memory) { } + + Memory& operator = (const Memory& rhs) + { + if (this != &rhs) { + detail::Wrapper<cl_type>::operator=(rhs); + } + return *this; + } + + Memory& operator = (const cl_mem& rhs) + { + detail::Wrapper<cl_type>::operator=(rhs); + return *this; + } + + template <typename T> + cl_int getInfo(cl_mem_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetMemObjectInfo, object_, name, param), + __GET_MEM_OBJECT_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_mem_info, name>::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_mem_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + +#if defined(CL_VERSION_1_1) + cl_int setDestructorCallback( + void (CL_CALLBACK * pfn_notify)(cl_mem, void *), + void * user_data = NULL) + { + return detail::errHandler( + ::clSetMemObjectDestructorCallback( + object_, + pfn_notify, + user_data), + __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); + } +#endif + +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::Memory) + +/*! \class Buffer + * \brief Memory buffer interface. + */ +class Buffer : public Memory +{ +public: + Buffer( + const Context& context, + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); +if (!(flags & CL_MEM_ALLOC_HOST_PTR)) +#pragma omp critical (cout) +std::cout << "A: Allocating " << size << " bytes" << std::endl; + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + Buffer( + cl_mem_flags flags, + ::size_t size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); +if (!(flags & CL_MEM_ALLOC_HOST_PTR)) +#pragma omp critical (cout) +std::cout << "B: Allocating " << size << " bytes" << std::endl; + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + + Buffer() : Memory() { } + + Buffer(const Buffer& buffer) : Memory(buffer) { } + + Buffer(const cl_mem& buffer) : Memory(buffer) { } + + Buffer& operator = (const Buffer& rhs) + { + if (this != &rhs) { + Memory::operator=(rhs); + } + return *this; + } + + Buffer& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + +#if defined(CL_VERSION_1_1) + Buffer createSubBuffer( + cl_mem_flags flags, + cl_buffer_create_type buffer_create_type, + const void * buffer_create_info, + cl_int * err = NULL) + { + Buffer result; + cl_int error; + result.object_ = ::clCreateSubBuffer( + object_, + flags, + buffer_create_type, + buffer_create_info, + &error); + + detail::errHandler(error, __CREATE_SUBBUFFER_ERR); + if (err != NULL) { + *err = error; + } + + return result; + } +#endif +}; + +#if defined (USE_DX_INTEROP) +class BufferD3D10 : public Buffer +{ +public: + typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, + cl_int* errcode_ret); + + BufferD3D10( + const Context& context, + cl_mem_flags flags, + ID3D10Buffer* bufobj, + cl_int * err = NULL) + { + static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; + __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); + + cl_int error; + object_ = pfn_clCreateFromD3D10BufferKHR( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + BufferD3D10() : Buffer() { } + + BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { } + + BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { } + + BufferD3D10& operator = (const BufferD3D10& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + BufferD3D10& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } +}; +#endif + +/*! \class BufferGL + * \brief Memory buffer interface for GL interop. + */ +class BufferGL : public Buffer +{ +public: + BufferGL( + const Context& context, + cl_mem_flags flags, + GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLBuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + BufferGL() : Buffer() { } + + BufferGL(const BufferGL& buffer) : Buffer(buffer) { } + + BufferGL(const cl_mem& buffer) : Buffer(buffer) { } + + BufferGL& operator = (const BufferGL& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + BufferGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + cl_int getObjectInfo( + cl_gl_object_type *type, + GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \class BufferRenderGL + * \brief Memory buffer interface for GL interop with renderbuffer. + */ +class BufferRenderGL : public Buffer +{ +public: + BufferRenderGL( + const Context& context, + cl_mem_flags flags, + GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLRenderbuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + BufferRenderGL() : Buffer() { } + + BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { } + + BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { } + + BufferRenderGL& operator = (const BufferRenderGL& rhs) + { + if (this != &rhs) { + Buffer::operator=(rhs); + } + return *this; + } + + BufferRenderGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + cl_int getObjectInfo( + cl_gl_object_type *type, + GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \class Image + * \brief Base class interface for all images. + */ +class Image : public Memory +{ +protected: + Image() : Memory() { } + + Image(const Image& image) : Memory(image) { } + + Image(const cl_mem& image) : Memory(image) { } + + Image& operator = (const Image& rhs) + { + if (this != &rhs) { + Memory::operator=(rhs); + } + return *this; + } + + Image& operator = (const cl_mem& rhs) + { + Memory::operator=(rhs); + return *this; + } + +public: + template <typename T> + cl_int getImageInfo(cl_image_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetImageInfo, object_, name, param), + __GET_IMAGE_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_image_info, name>::param_type + getImageInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_image_info, name>::param_type param; + cl_int result = getImageInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +/*! \class Image2D + * \brief Image interface for 2D images. + */ +class Image2D : public Image +{ +public: + Image2D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t row_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateImage2D( + context(), flags,&format, width, height, row_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE2D_ERR); + if (err != NULL) { + *err = error; + } + } + + Image2D() { } + + Image2D(const Image2D& image2D) : Image(image2D) { } + + Image2D(const cl_mem& image2D) : Image(image2D) { } + + Image2D& operator = (const Image2D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image2D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +/*! \class Image2DGL + * \brief 2D image interface for GL interop. + */ +class Image2DGL : public Image2D +{ +public: + Image2DGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture2D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); + if (err != NULL) { + *err = error; + } + } + + Image2DGL() : Image2D() { } + + Image2DGL(const Image2DGL& image) : Image2D(image) { } + + Image2DGL(const cl_mem& image) : Image2D(image) { } + + Image2DGL& operator = (const Image2DGL& rhs) + { + if (this != &rhs) { + Image2D::operator=(rhs); + } + return *this; + } + + Image2DGL& operator = (const cl_mem& rhs) + { + Image2D::operator=(rhs); + return *this; + } +}; + +/*! \class Image3D + * \brief Image interface for 3D images. + */ +class Image3D : public Image +{ +public: + Image3D( + const Context& context, + cl_mem_flags flags, + ImageFormat format, + ::size_t width, + ::size_t height, + ::size_t depth, + ::size_t row_pitch = 0, + ::size_t slice_pitch = 0, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateImage3D( + context(), flags, &format, width, height, depth, row_pitch, + slice_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE3D_ERR); + if (err != NULL) { + *err = error; + } + } + + Image3D() { } + + Image3D(const Image3D& image3D) : Image(image3D) { } + + Image3D(const cl_mem& image3D) : Image(image3D) { } + + Image3D& operator = (const Image3D& rhs) + { + if (this != &rhs) { + Image::operator=(rhs); + } + return *this; + } + + Image3D& operator = (const cl_mem& rhs) + { + Image::operator=(rhs); + return *this; + } +}; + +/*! \class Image2DGL + * \brief 2D image interface for GL interop. + */ +class Image3DGL : public Image3D +{ +public: + Image3DGL( + const Context& context, + cl_mem_flags flags, + GLenum target, + GLint miplevel, + GLuint texobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLTexture3D( + context(), + flags, + target, + miplevel, + texobj, + &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); + if (err != NULL) { + *err = error; + } + } + + Image3DGL() : Image3D() { } + + Image3DGL(const Image3DGL& image) : Image3D(image) { } + + Image3DGL(const cl_mem& image) : Image3D(image) { } + + Image3DGL& operator = (const Image3DGL& rhs) + { + if (this != &rhs) { + Image3D::operator=(rhs); + } + return *this; + } + + Image3DGL& operator = (const cl_mem& rhs) + { + Image3D::operator=(rhs); + return *this; + } +}; + +/*! \class Sampler + * \brief Sampler interface for cl_sampler. + */ +class Sampler : public detail::Wrapper<cl_sampler> +{ +public: + Sampler() { } + + Sampler( + const Context& context, + cl_bool normalized_coords, + cl_addressing_mode addressing_mode, + cl_filter_mode filter_mode, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateSampler( + context(), + normalized_coords, + addressing_mode, + filter_mode, + &error); + + detail::errHandler(error, __CREATE_SAMPLER_ERR); + if (err != NULL) { + *err = error; + } + } + + Sampler(const Sampler& sampler) : detail::Wrapper<cl_type>(sampler) { } + + Sampler(const cl_sampler& sampler) : detail::Wrapper<cl_type>(sampler) { } + + Sampler& operator = (const Sampler& rhs) + { + if (this != &rhs) { + detail::Wrapper<cl_type>::operator=(rhs); + } + return *this; + } + + Sampler& operator = (const cl_sampler& rhs) + { + detail::Wrapper<cl_type>::operator=(rhs); + return *this; + } + + template <typename T> + cl_int getInfo(cl_sampler_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetSamplerInfo, object_, name, param), + __GET_SAMPLER_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_sampler_info, name>::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_sampler_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::Sampler) + +class Program; +class CommandQueue; +class Kernel; + +/*! \class NDRange + * \brief NDRange interface + */ +class NDRange +{ +private: + size_t<3> sizes_; + cl_uint dimensions_; + +public: + NDRange() + : dimensions_(0) + { } + + NDRange(::size_t size0) + : dimensions_(1) + { + sizes_.push_back(size0); + } + + NDRange(::size_t size0, ::size_t size1) + : dimensions_(2) + { + sizes_.push_back(size0); + sizes_.push_back(size1); + } + + NDRange(::size_t size0, ::size_t size1, ::size_t size2) + : dimensions_(3) + { + sizes_.push_back(size0); + sizes_.push_back(size1); + sizes_.push_back(size2); + } + + operator const ::size_t*() const { return (const ::size_t*) sizes_; } + ::size_t dimensions() const { return dimensions_; } +}; + +static const NDRange NullRange; + +/*! + * \struct LocalSpaceArg + * \brief Local address raper for use with Kernel::setArg + */ +struct LocalSpaceArg +{ + ::size_t size_; +}; + +namespace detail { + +template <typename T> +struct KernelArgumentHandler +{ + static ::size_t size(const T&) { return sizeof(T); } + static T* ptr(T& value) { return &value; } +}; + +template <> +struct KernelArgumentHandler<LocalSpaceArg> +{ + static ::size_t size(const LocalSpaceArg& value) { return value.size_; } + static void* ptr(LocalSpaceArg&) { return NULL; } +}; + +} +//! \endcond + +inline LocalSpaceArg +__local(::size_t size) +{ + LocalSpaceArg ret = { size }; + return ret; +} + +//class KernelFunctor; + +/*! \class Kernel + * \brief Kernel interface that implements cl_kernel + */ +class Kernel : public detail::Wrapper<cl_kernel> +{ +public: + inline Kernel(const Program& program, const char* name, cl_int* err = NULL); + + Kernel() { } + + Kernel(const Kernel& kernel) : detail::Wrapper<cl_type>(kernel) { } + + Kernel(const cl_kernel& kernel) : detail::Wrapper<cl_type>(kernel) { } + + Kernel& operator = (const Kernel& rhs) + { + if (this != &rhs) { + detail::Wrapper<cl_type>::operator=(rhs); + } + return *this; + } + + Kernel& operator = (const cl_kernel& rhs) + { + detail::Wrapper<cl_type>::operator=(rhs); + return *this; + } + + template <typename T> + cl_int getInfo(cl_kernel_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetKernelInfo, object_, name, param), + __GET_KERNEL_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_kernel_info, name>::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template <typename T> + cl_int getWorkGroupInfo( + const Device& device, cl_kernel_work_group_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetKernelWorkGroupInfo, object_, device(), name, param), + __GET_KERNEL_WORK_GROUP_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_kernel_work_group_info, name>::param_type + getWorkGroupInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_kernel_work_group_info, name>::param_type param; + cl_int result = getWorkGroupInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template <typename T> + cl_int setArg(cl_uint index, T value) + { + return detail::errHandler( + ::clSetKernelArg( + object_, + index, + detail::KernelArgumentHandler<T>::size(value), + detail::KernelArgumentHandler<T>::ptr(value)), + __SET_KERNEL_ARGS_ERR); + } + + cl_int setArg(cl_uint index, ::size_t size, void* argPtr) + { + return detail::errHandler( + ::clSetKernelArg(object_, index, size, argPtr), + __SET_KERNEL_ARGS_ERR); + } +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::Kernel) + +/*! \class Program + * \brief Program interface that implements cl_program. + */ +class Program : public detail::Wrapper<cl_program> +{ +public: + typedef VECTOR_CLASS<std::pair<const void*, ::size_t> > Binaries; + typedef VECTOR_CLASS<std::pair<const char*, ::size_t> > Sources; + + Program( + const STRING_CLASS& source, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + Context context = Context::getDefault(err); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const STRING_CLASS& source, + bool build, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + Context context = Context::getDefault(err); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const STRING_CLASS& source, + bool build = false, + cl_int* err = NULL) + { + cl_int error; + + const char * strings = source.c_str(); + const ::size_t length = source.size(); + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + + error = ::clBuildProgram( + object_, + 0, + NULL, + "", + NULL, + NULL); + + detail::errHandler(error, __BUILD_PROGRAM_ERR); + } + + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const Sources& sources, + cl_int* err = NULL) + { + cl_int error; + + const ::size_t n = (::size_t)sources.size(); + ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); + const char** strings = (const char**) alloca(n * sizeof(const char*)); + + for (::size_t i = 0; i < n; ++i) { + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; + } + + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings, lengths, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != NULL) { + *err = error; + } + } + + Program( + const Context& context, + const VECTOR_CLASS<Device>& devices, + const Binaries& binaries, + VECTOR_CLASS<cl_int>* binaryStatus = NULL, + cl_int* err = NULL) + { + cl_int error; + const ::size_t n = binaries.size(); + ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); + const unsigned char** images = (const unsigned char**) alloca(n * sizeof(const void*)); + + for (::size_t i = 0; i < n; ++i) { + images[i] = (const unsigned char*)binaries[(int)i].first; + lengths[i] = binaries[(int)i].second; + } + + object_ = ::clCreateProgramWithBinary( + context(), (cl_uint) devices.size(), + (cl_device_id*)&devices.front(), + lengths, images, binaryStatus != NULL + ? (cl_int*) &binaryStatus->front() + : NULL, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != NULL) { + *err = error; + } + } + + Program() { } + + Program(const Program& program) : detail::Wrapper<cl_type>(program) { } + + Program(const cl_program& program) : detail::Wrapper<cl_type>(program) { } + + Program& operator = (const Program& rhs) + { + if (this != &rhs) { + detail::Wrapper<cl_type>::operator=(rhs); + } + return *this; + } + + Program& operator = (const cl_program& rhs) + { + detail::Wrapper<cl_type>::operator=(rhs); + return *this; + } + + cl_int build( + const VECTOR_CLASS<Device>& devices, + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clBuildProgram( + object_, + (cl_uint) + devices.size(), + (cl_device_id*)&devices.front(), + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + + cl_int build( + const char* options = NULL, + void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, + void* data = NULL) const + { + return detail::errHandler( + ::clBuildProgram( + object_, + 0, + NULL, + options, + notifyFptr, + data), + __BUILD_PROGRAM_ERR); + } + + template <typename T> + cl_int getInfo(cl_program_info name, T* param) const + { + return detail::errHandler( + detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_program_info, name>::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + template <typename T> + cl_int getBuildInfo( + const Device& device, cl_program_build_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetProgramBuildInfo, object_, device(), name, param), + __GET_PROGRAM_BUILD_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_program_build_info, name>::param_type + getBuildInfo(const Device& device, cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_program_build_info, name>::param_type param; + cl_int result = getBuildInfo(device, name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int createKernels(VECTOR_CLASS<Kernel>* kernels) + { + cl_uint numKernels; + cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); + err = ::clCreateKernelsInProgram( + object_, numKernels, (cl_kernel*) value, NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + kernels->assign(&value[0], &value[numKernels]); + return CL_SUCCESS; + } +}; + +template<> +inline VECTOR_CLASS<char *> cl::Program::getInfo<CL_PROGRAM_BINARIES>(cl_int* err) const +{ + VECTOR_CLASS< ::size_t> sizes = getInfo<CL_PROGRAM_BINARY_SIZES>(); + VECTOR_CLASS<char *> binaries; + for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) + { + char *ptr = NULL; + if (*s != 0) + ptr = new char[*s]; + binaries.push_back(ptr); + } + + cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); + if (err != NULL) { + *err = result; + } + return binaries; +} + +__GET_INFO_HELPER_WITH_RETAIN(cl::Program) + +inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) +{ + cl_int error; + + object_ = ::clCreateKernel(program(), name, &error); + detail::errHandler(error, __CREATE_KERNEL_ERR); + + if (err != NULL) { + *err = error; + } + +} + +/*! \class CommandQueue + * \brief CommandQueue interface for cl_command_queue. + */ +class CommandQueue : public detail::Wrapper<cl_command_queue> +{ +private: + static volatile int default_initialized_; + static CommandQueue default_; + static volatile cl_int default_error_; +public: + CommandQueue( + cl_command_queue_properties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0]; + + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + } + + CommandQueue( + const Context& context, + const Device& device, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + static CommandQueue getDefault(cl_int * err = NULL) + { + int state = detail::compare_exchange( + &default_initialized_, + __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); + + if (state & __DEFAULT_INITIALIZED) { + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + if (state & __DEFAULT_BEING_INITIALIZED) { + // Assume writes will propagate eventually... + while(default_initialized_ != __DEFAULT_INITIALIZED) { + ; + } + + if (err != NULL) { + *err = default_error_; + } + return default_; + } + + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0]; + + + default_ = CommandQueue(context, device, 0, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } + } + + default_error_ = error; + // Assume writes will propagate eventually... + default_initialized_ = __DEFAULT_INITIALIZED; + if (err != NULL) { + *err = default_error_; + } + return default_; + + } + + CommandQueue() { } + + CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { } + + CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { } + + CommandQueue& operator = (const CommandQueue& rhs) + { + if (this != &rhs) { + detail::Wrapper<cl_type>::operator=(rhs); + } + return *this; + } + + CommandQueue& operator = (const cl_command_queue& rhs) + { + detail::Wrapper<cl_type>::operator=(rhs); + return *this; + } + + template <typename T> + cl_int getInfo(cl_command_queue_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template <cl_int name> typename + detail::param_traits<detail::cl_command_queue_info, name>::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_command_queue_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } + + cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBuffer( + object_, buffer(), blocking, offset, size, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBuffer( + object_, src(), dst(), src_offset, dst_offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined(CL_VERSION_1_1) + cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + + cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBufferRect( + object_, + buffer(), + blocking, + (const ::size_t *)buffer_offset, + (const ::size_t *)host_offset, + (const ::size_t *)region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferRect( + object_, + src(), + dst(), + (const ::size_t *)src_origin, + (const ::size_t *)dst_origin, + (const ::size_t *)region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQEUE_COPY_BUFFER_RECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif + + cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_READ_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteImage( + object_, image(), blocking, (const ::size_t *) origin, + (const ::size_t *) region, row_pitch, slice_pitch, ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_WRITE_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImage( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *)dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImageToBuffer( + object_, src(), dst(), (const ::size_t *) src_origin, + (const ::size_t *) region, dst_offset, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferToImage( + object_, src(), dst(), src_offset, + (const ::size_t *) dst_origin, (const ::size_t *) region, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + ::size_t offset, + ::size_t size, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_int error; + void * result = ::clEnqueueMapBuffer( + object_, buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + return result; + } + + void* enqueueMapImage( + const Image& buffer, + cl_bool blocking, + cl_map_flags flags, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t * row_pitch, + ::size_t * slice_pitch, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL, + cl_int* err = NULL) const + { + cl_int error; + void * result = ::clEnqueueMapImage( + object_, buffer(), blocking, flags, + (const ::size_t *) origin, (const ::size_t *) region, + row_pitch, slice_pitch, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); + if (err != NULL) { + *err = error; + } + return result; + } + + cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + object_, memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueNDRangeKernel( + const Kernel& kernel, + const NDRange& offset, + const NDRange& global, + const NDRange& local = NullRange, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNDRangeKernel( + object_, kernel(), (cl_uint) global.dimensions(), + offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, + (const ::size_t*) global, + local.dimensions() != 0 ? (const ::size_t*) local : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NDRANGE_KERNEL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueTask( + const Kernel& kernel, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueTask( + object_, kernel(), + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_TASK_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueNativeKernel( + void (*userFptr)(void *), + std::pair<void*, ::size_t> args, + const VECTOR_CLASS<Memory>* mem_objects = NULL, + const VECTOR_CLASS<const void*>* mem_locs = NULL, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) + ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) + : NULL; + + if (mems != NULL) { + for (unsigned int i = 0; i < mem_objects->size(); i++) { + mems[i] = ((*mem_objects)[i])(); + } + } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNativeKernel( + object_, userFptr, args.first, args.second, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + mems, + (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_NATIVE_KERNEL); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueMarker(Event* event = NULL) const + { + return detail::errHandler( + ::clEnqueueMarker(object_, (cl_event*) event), + __ENQUEUE_MARKER_ERR); + } + + cl_int enqueueWaitForEvents(const VECTOR_CLASS<Event>& events) const + { + return detail::errHandler( + ::clEnqueueWaitForEvents( + object_, + (cl_uint) events.size(), + (const cl_event*) &events.front()), + __ENQUEUE_WAIT_FOR_EVENTS_ERR); + } + + cl_int enqueueAcquireGLObjects( + const VECTOR_CLASS<Memory>* mem_objects = NULL, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueAcquireGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseGLObjects( + const VECTOR_CLASS<Memory>* mem_objects = NULL, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReleaseGLObjects( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + +#if defined (USE_DX_INTEROP) +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); +typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, + const cl_mem* mem_objects, cl_uint num_events_in_wait_list, + const cl_event* event_wait_list, cl_event* event); + + cl_int enqueueAcquireD3D10Objects( + const VECTOR_CLASS<Memory>* mem_objects = NULL, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; + __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueAcquireD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + cl_int enqueueReleaseD3D10Objects( + const VECTOR_CLASS<Memory>* mem_objects = NULL, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) const + { + static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; + __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueReleaseD3D10ObjectsKHR( + object_, + (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, + (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif + + cl_int enqueueBarrier() const + { + return detail::errHandler( + ::clEnqueueBarrier(object_), + __ENQUEUE_BARRIER_ERR); + } + + cl_int flush() const + { + return detail::errHandler(::clFlush(object_), __FLUSH_ERR); + } + + cl_int finish() const + { + return detail::errHandler(::clFinish(object_), __FINISH_ERR); + } +}; + +__GET_INFO_HELPER_WITH_RETAIN(cl::CommandQueue) + +#ifdef _WIN32 +__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__declspec(selectany) CommandQueue CommandQueue::default_; +__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#else +__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; +__attribute__((weak)) CommandQueue CommandQueue::default_; +__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; +#endif + +inline cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + void* ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + ::size_t offset, + ::size_t size, + const void* ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + ::size_t src_offset, + ::size_t dst_offset, + ::size_t size, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); +} + +#if defined(CL_VERSION_1_1) +inline cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t /*buffer_slice_pitch*/, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + host_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const size_t<3>& buffer_offset, + const size_t<3>& host_offset, + const size_t<3>& region, + ::size_t buffer_row_pitch, + ::size_t buffer_slice_pitch, + ::size_t host_row_pitch, + ::size_t host_slice_pitch, + void *ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + ::size_t src_row_pitch, + ::size_t src_slice_pitch, + ::size_t dst_row_pitch, + ::size_t dst_slice_pitch, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferRect( + src, + dst, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); +} +#endif + +inline cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueReadImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const size_t<3>& origin, + const size_t<3>& region, + ::size_t row_pitch, + ::size_t slice_pitch, + void* ptr, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueWriteImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} + +inline cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const size_t<3>& src_origin, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImage( + src, + dst, + src_origin, + dst_origin, + region, + events, + event); +} + +inline cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const size_t<3>& src_origin, + const size_t<3>& region, + ::size_t dst_offset, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyImageToBuffer( + src, + dst, + src_origin, + region, + dst_offset, + events, + event); +} + +inline cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + ::size_t src_offset, + const size_t<3>& dst_origin, + const size_t<3>& region, + const VECTOR_CLASS<Event>* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.enqueueCopyBufferToImage( + src, + dst, + src_offset, + dst_origin, + region, + events, + event); +} + + +inline cl_int flush(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + return queue.flush(); +} + +inline cl_int finish(void) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; + } + + + return queue.finish(); +} + + +// Kernel Functor support +// New interface as of September 2011 +// Requires the C++11 std::tr1::function (note do not support TR1) +// Visual Studio 2010 and GCC 4.2 + +#if (defined(_WIN32) && (_MSC_VER >= 1600)) || defined(linux) || defined(__APPLE__) || defined(__MACOSX) +struct EnqueueArgs +{ + CommandQueue queue_; + NDRange offset_; + NDRange global_; + NDRange local_; + + EnqueueArgs(NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue queue, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { + + } + + EnqueueArgs(CommandQueue queue, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) + { + + } + + EnqueueArgs(CommandQueue queue, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { + + } +}; + +namespace detail { + +class NullType {}; + +template<int index, typename T0> +struct SetArg +{ + static void set (Kernel kernel, T0 arg) + { + kernel.setArg(index, arg); + } +}; + + +template<int index> +struct SetArg<index, NullType> +{ + static void set (Kernel kernel, NullType) + { + } +}; + +template<typename T> +struct MoveToDevice +{ + static void move(CommandQueue, T) + { + } +}; + + +template<typename T> +struct MoveFromDevice +{ + static void move(CommandQueue, T) + { + } +}; + + +template < + typename T0, typename T1, typename T2, typename T3, + typename T4, typename T5, typename T6, typename T7, + typename T8, typename T9, typename T10, typename T11, + typename T12, typename T13, typename T14, typename T15, + typename T16, typename T17, typename T18, typename T19, + typename T20, typename T21, typename T22, typename T23, + typename T24, typename T25, typename T26, typename T27, + typename T28, typename T29, typename T30, typename T31, + typename T32, typename T33, typename T34, typename T35, + typename T36, typename T37, typename T38, typename T39, + typename T40, typename T41, typename T42, typename T43, + typename T44, typename T45, typename T46, typename T47, + typename T48, typename T49, typename T50, typename T51, + typename T52, typename T53, typename T54, typename T55, + typename T56, typename T57, typename T58, typename T59, + typename T60, typename T61, typename T62, typename T63, + typename T64, typename T65, typename T66, typename T67, + typename T68, typename T69, typename T70, typename T71, + typename T72, typename T73, typename T74, typename T75, + typename T76, typename T77, typename T78, typename T79, + typename T80, typename T81, typename T82, typename T83, + typename T84, typename T85, typename T86, typename T87, + typename T88, typename T89, typename T90, typename T91, + typename T92, typename T93, typename T94, typename T95, + typename T96, typename T97, typename T98, typename T99, + typename T100, typename T101, typename T102, typename T103, + typename T104, typename T105, typename T106, typename T107, + typename T108, typename T109, typename T110, typename T111, + typename T112, typename T113, typename T114, typename T115, + typename T116, typename T117, typename T118, typename T119, + typename T120, typename T121, typename T122, typename T123, + typename T124, typename T125, typename T126, typename T127> +class KernelFunctorGlobal +{ +private: + Kernel kernel_; + +public: + KernelFunctorGlobal( + Kernel kernel, + cl_int * err = NULL) : + kernel_(kernel) + {} + + KernelFunctorGlobal( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + kernel_(program, name.c_str(), err) + {} + + Event operator() ( + const EnqueueArgs& args, + T0 t0, + T1 t1 = NullType(), + T2 t2 = NullType(), + T3 t3 = NullType(), + T4 t4 = NullType(), + T5 t5 = NullType(), + T6 t6 = NullType(), + T7 t7 = NullType(), + T8 t8 = NullType(), + T9 t9 = NullType(), + T10 t10 = NullType(), + T11 t11 = NullType(), + T12 t12 = NullType(), + T13 t13 = NullType(), + T14 t14 = NullType(), + T15 t15 = NullType(), + T16 t16 = NullType(), + T17 t17 = NullType(), + T18 t18 = NullType(), + T19 t19 = NullType(), + T20 t20 = NullType(), + T21 t21 = NullType(), + T22 t22 = NullType(), + T23 t23 = NullType(), + T24 t24 = NullType(), + T25 t25 = NullType(), + T26 t26 = NullType(), + T27 t27 = NullType(), + T28 t28 = NullType(), + T29 t29 = NullType(), + T30 t30 = NullType(), + T31 t31 = NullType(), + T32 t32 = NullType(), + T33 t33 = NullType(), + T34 t34 = NullType(), + T35 t35 = NullType(), + T36 t36 = NullType(), + T37 t37 = NullType(), + T38 t38 = NullType(), + T39 t39 = NullType(), + T40 t40 = NullType(), + T41 t41 = NullType(), + T42 t42 = NullType(), + T43 t43 = NullType(), + T44 t44 = NullType(), + T45 t45 = NullType(), + T46 t46 = NullType(), + T47 t47 = NullType(), + T48 t48 = NullType(), + T49 t49 = NullType(), + T50 t50 = NullType(), + T51 t51 = NullType(), + T52 t52 = NullType(), + T53 t53 = NullType(), + T54 t54 = NullType(), + T55 t55 = NullType(), + T56 t56 = NullType(), + T57 t57 = NullType(), + T58 t58 = NullType(), + T59 t59 = NullType(), + T60 t60 = NullType(), + T61 t61 = NullType(), + T62 t62 = NullType(), + T63 t63 = NullType(), + T64 t64 = NullType(), + T65 t65 = NullType(), + T66 t66 = NullType(), + T67 t67 = NullType(), + T68 t68 = NullType(), + T69 t69 = NullType(), + T70 t70 = NullType(), + T71 t71 = NullType(), + T72 t72 = NullType(), + T73 t73 = NullType(), + T74 t74 = NullType(), + T75 t75 = NullType(), + T76 t76 = NullType(), + T77 t77 = NullType(), + T78 t78 = NullType(), + T79 t79 = NullType(), + T80 t80 = NullType(), + T81 t81 = NullType(), + T82 t82 = NullType(), + T83 t83 = NullType(), + T84 t84 = NullType(), + T85 t85 = NullType(), + T86 t86 = NullType(), + T87 t87 = NullType(), + T88 t88 = NullType(), + T89 t89 = NullType(), + T90 t90 = NullType(), + T91 t91 = NullType(), + T92 t92 = NullType(), + T93 t93 = NullType(), + T94 t94 = NullType(), + T95 t95 = NullType(), + T96 t96 = NullType(), + T97 t97 = NullType(), + T98 t98 = NullType(), + T99 t99 = NullType(), + T100 t100 = NullType(), + T101 t101 = NullType(), + T102 t102 = NullType(), + T103 t103 = NullType(), + T104 t104 = NullType(), + T105 t105 = NullType(), + T106 t106 = NullType(), + T107 t107 = NullType(), + T108 t108 = NullType(), + T109 t109 = NullType(), + T110 t110 = NullType(), + T111 t111 = NullType(), + T112 t112 = NullType(), + T113 t113 = NullType(), + T114 t114 = NullType(), + T115 t115 = NullType(), + T116 t116 = NullType(), + T117 t117 = NullType(), + T118 t118 = NullType(), + T119 t119 = NullType(), + T120 t120 = NullType(), + T121 t121 = NullType(), + T122 t122 = NullType(), + T123 t123 = NullType(), + T124 t124 = NullType(), + T125 t125 = NullType(), + T126 t126 = NullType(), + T127 t127 = NullType()) + { + Event event; + SetArg<0, T0>::set(kernel_, t0); + SetArg<1, T1>::set(kernel_, t1); + SetArg<2, T2>::set(kernel_, t2); + SetArg<3, T3>::set(kernel_, t3); + SetArg<4, T4>::set(kernel_, t4); + SetArg<5, T5>::set(kernel_, t5); + SetArg<6, T6>::set(kernel_, t6); + SetArg<7, T7>::set(kernel_, t7); + SetArg<8, T8>::set(kernel_, t8); + SetArg<9, T9>::set(kernel_, t9); + SetArg<10, T10>::set(kernel_, t10); + SetArg<11, T11>::set(kernel_, t11); + SetArg<12, T12>::set(kernel_, t12); + SetArg<13, T13>::set(kernel_, t13); + SetArg<14, T14>::set(kernel_, t14); + SetArg<15, T15>::set(kernel_, t15); + SetArg<16, T16>::set(kernel_, t16); + SetArg<17, T17>::set(kernel_, t17); + SetArg<18, T18>::set(kernel_, t18); + SetArg<19, T19>::set(kernel_, t19); + SetArg<20, T20>::set(kernel_, t20); + SetArg<21, T21>::set(kernel_, t21); + SetArg<22, T22>::set(kernel_, t22); + SetArg<23, T23>::set(kernel_, t23); + SetArg<24, T24>::set(kernel_, t24); + SetArg<25, T25>::set(kernel_, t25); + SetArg<26, T26>::set(kernel_, t26); + SetArg<27, T27>::set(kernel_, t27); + SetArg<28, T28>::set(kernel_, t28); + SetArg<29, T29>::set(kernel_, t29); + SetArg<30, T30>::set(kernel_, t30); + SetArg<31, T31>::set(kernel_, t31); + SetArg<32, T32>::set(kernel_, t32); + SetArg<33, T33>::set(kernel_, t33); + SetArg<34, T34>::set(kernel_, t34); + SetArg<35, T35>::set(kernel_, t35); + SetArg<36, T36>::set(kernel_, t36); + SetArg<37, T37>::set(kernel_, t37); + SetArg<38, T38>::set(kernel_, t38); + SetArg<39, T39>::set(kernel_, t39); + SetArg<40, T40>::set(kernel_, t40); + SetArg<41, T41>::set(kernel_, t41); + SetArg<42, T42>::set(kernel_, t42); + SetArg<43, T43>::set(kernel_, t43); + SetArg<44, T44>::set(kernel_, t44); + SetArg<45, T45>::set(kernel_, t45); + SetArg<46, T46>::set(kernel_, t46); + SetArg<47, T47>::set(kernel_, t47); + SetArg<48, T48>::set(kernel_, t48); + SetArg<49, T49>::set(kernel_, t49); + SetArg<50, T50>::set(kernel_, t50); + SetArg<51, T51>::set(kernel_, t51); + SetArg<52, T52>::set(kernel_, t52); + SetArg<53, T53>::set(kernel_, t53); + SetArg<54, T54>::set(kernel_, t54); + SetArg<55, T55>::set(kernel_, t55); + SetArg<56, T56>::set(kernel_, t56); + SetArg<57, T57>::set(kernel_, t57); + SetArg<58, T58>::set(kernel_, t58); + SetArg<59, T59>::set(kernel_, t59); + SetArg<60, T60>::set(kernel_, t60); + SetArg<61, T61>::set(kernel_, t61); + SetArg<62, T62>::set(kernel_, t62); + SetArg<63, T63>::set(kernel_, t63); + SetArg<64, T64>::set(kernel_, t64); + SetArg<65, T65>::set(kernel_, t65); + SetArg<66, T66>::set(kernel_, t66); + SetArg<67, T67>::set(kernel_, t67); + SetArg<68, T68>::set(kernel_, t68); + SetArg<69, T69>::set(kernel_, t69); + SetArg<70, T70>::set(kernel_, t70); + SetArg<71, T71>::set(kernel_, t71); + SetArg<72, T72>::set(kernel_, t72); + SetArg<73, T73>::set(kernel_, t73); + SetArg<74, T74>::set(kernel_, t74); + SetArg<75, T75>::set(kernel_, t75); + SetArg<76, T76>::set(kernel_, t76); + SetArg<77, T77>::set(kernel_, t77); + SetArg<78, T78>::set(kernel_, t78); + SetArg<79, T79>::set(kernel_, t79); + SetArg<80, T80>::set(kernel_, t80); + SetArg<81, T81>::set(kernel_, t81); + SetArg<82, T82>::set(kernel_, t82); + SetArg<83, T83>::set(kernel_, t83); + SetArg<84, T84>::set(kernel_, t84); + SetArg<85, T85>::set(kernel_, t85); + SetArg<86, T86>::set(kernel_, t86); + SetArg<87, T87>::set(kernel_, t87); + SetArg<88, T88>::set(kernel_, t88); + SetArg<89, T89>::set(kernel_, t89); + SetArg<90, T90>::set(kernel_, t90); + SetArg<91, T91>::set(kernel_, t91); + SetArg<92, T92>::set(kernel_, t92); + SetArg<93, T93>::set(kernel_, t93); + SetArg<94, T94>::set(kernel_, t94); + SetArg<95, T95>::set(kernel_, t95); + SetArg<96, T96>::set(kernel_, t96); + SetArg<97, T97>::set(kernel_, t97); + SetArg<98, T98>::set(kernel_, t98); + SetArg<99, T99>::set(kernel_, t99); + SetArg<100, T100>::set(kernel_, t100); + SetArg<101, T101>::set(kernel_, t101); + SetArg<102, T102>::set(kernel_, t102); + SetArg<103, T103>::set(kernel_, t103); + SetArg<104, T104>::set(kernel_, t104); + SetArg<105, T105>::set(kernel_, t105); + SetArg<106, T106>::set(kernel_, t106); + SetArg<107, T107>::set(kernel_, t107); + SetArg<108, T108>::set(kernel_, t108); + SetArg<109, T109>::set(kernel_, t109); + SetArg<110, T110>::set(kernel_, t110); + SetArg<111, T111>::set(kernel_, t111); + SetArg<112, T112>::set(kernel_, t112); + SetArg<113, T113>::set(kernel_, t113); + SetArg<114, T114>::set(kernel_, t114); + SetArg<115, T115>::set(kernel_, t115); + SetArg<116, T116>::set(kernel_, t116); + SetArg<117, T117>::set(kernel_, t117); + SetArg<118, T118>::set(kernel_, t118); + SetArg<119, T119>::set(kernel_, t119); + SetArg<120, T120>::set(kernel_, t120); + SetArg<121, T121>::set(kernel_, t121); + SetArg<122, T122>::set(kernel_, t122); + SetArg<123, T123>::set(kernel_, t123); + SetArg<124, T124>::set(kernel_, t124); + SetArg<125, T125>::set(kernel_, t125); + SetArg<126, T126>::set(kernel_, t126); + SetArg<127, T127>::set(kernel_, t127); + + // Move any arguments that need moving + MoveToDevice<T0>::move(args.queue_, t0); + MoveToDevice<T1>::move(args.queue_, t1); + MoveToDevice<T2>::move(args.queue_, t2); + MoveToDevice<T3>::move(args.queue_, t3); + MoveToDevice<T4>::move(args.queue_, t4); + MoveToDevice<T5>::move(args.queue_, t5); + MoveToDevice<T6>::move(args.queue_, t6); + MoveToDevice<T7>::move(args.queue_, t7); + MoveToDevice<T8>::move(args.queue_, t8); + MoveToDevice<T9>::move(args.queue_, t9); + MoveToDevice<T10>::move(args.queue_, t10); + MoveToDevice<T11>::move(args.queue_, t11); + MoveToDevice<T12>::move(args.queue_, t12); + MoveToDevice<T13>::move(args.queue_, t13); + MoveToDevice<T14>::move(args.queue_, t14); + MoveToDevice<T15>::move(args.queue_, t15); + MoveToDevice<T16>::move(args.queue_, t16); + MoveToDevice<T17>::move(args.queue_, t17); + MoveToDevice<T18>::move(args.queue_, t18); + MoveToDevice<T19>::move(args.queue_, t19); + MoveToDevice<T20>::move(args.queue_, t20); + MoveToDevice<T21>::move(args.queue_, t21); + MoveToDevice<T22>::move(args.queue_, t22); + MoveToDevice<T23>::move(args.queue_, t23); + MoveToDevice<T24>::move(args.queue_, t24); + MoveToDevice<T25>::move(args.queue_, t25); + MoveToDevice<T26>::move(args.queue_, t26); + MoveToDevice<T27>::move(args.queue_, t27); + MoveToDevice<T28>::move(args.queue_, t28); + MoveToDevice<T29>::move(args.queue_, t29); + MoveToDevice<T30>::move(args.queue_, t30); + MoveToDevice<T31>::move(args.queue_, t31); + MoveToDevice<T32>::move(args.queue_, t32); + MoveToDevice<T33>::move(args.queue_, t33); + MoveToDevice<T34>::move(args.queue_, t34); + MoveToDevice<T35>::move(args.queue_, t35); + MoveToDevice<T36>::move(args.queue_, t36); + MoveToDevice<T37>::move(args.queue_, t37); + MoveToDevice<T38>::move(args.queue_, t38); + MoveToDevice<T39>::move(args.queue_, t39); + MoveToDevice<T40>::move(args.queue_, t40); + MoveToDevice<T41>::move(args.queue_, t41); + MoveToDevice<T42>::move(args.queue_, t42); + MoveToDevice<T43>::move(args.queue_, t43); + MoveToDevice<T44>::move(args.queue_, t44); + MoveToDevice<T45>::move(args.queue_, t45); + MoveToDevice<T46>::move(args.queue_, t46); + MoveToDevice<T47>::move(args.queue_, t47); + MoveToDevice<T48>::move(args.queue_, t48); + MoveToDevice<T49>::move(args.queue_, t49); + MoveToDevice<T50>::move(args.queue_, t50); + MoveToDevice<T51>::move(args.queue_, t51); + MoveToDevice<T52>::move(args.queue_, t52); + MoveToDevice<T53>::move(args.queue_, t53); + MoveToDevice<T54>::move(args.queue_, t54); + MoveToDevice<T55>::move(args.queue_, t55); + MoveToDevice<T56>::move(args.queue_, t56); + MoveToDevice<T57>::move(args.queue_, t57); + MoveToDevice<T58>::move(args.queue_, t58); + MoveToDevice<T59>::move(args.queue_, t59); + MoveToDevice<T60>::move(args.queue_, t60); + MoveToDevice<T61>::move(args.queue_, t61); + MoveToDevice<T62>::move(args.queue_, t62); + MoveToDevice<T63>::move(args.queue_, t63); + MoveToDevice<T64>::move(args.queue_, t64); + MoveToDevice<T65>::move(args.queue_, t65); + MoveToDevice<T66>::move(args.queue_, t66); + MoveToDevice<T67>::move(args.queue_, t67); + MoveToDevice<T68>::move(args.queue_, t68); + MoveToDevice<T69>::move(args.queue_, t69); + MoveToDevice<T70>::move(args.queue_, t70); + MoveToDevice<T71>::move(args.queue_, t71); + MoveToDevice<T72>::move(args.queue_, t72); + MoveToDevice<T73>::move(args.queue_, t73); + MoveToDevice<T74>::move(args.queue_, t74); + MoveToDevice<T75>::move(args.queue_, t75); + MoveToDevice<T76>::move(args.queue_, t76); + MoveToDevice<T77>::move(args.queue_, t77); + MoveToDevice<T78>::move(args.queue_, t78); + MoveToDevice<T79>::move(args.queue_, t79); + MoveToDevice<T80>::move(args.queue_, t80); + MoveToDevice<T81>::move(args.queue_, t81); + MoveToDevice<T82>::move(args.queue_, t82); + MoveToDevice<T83>::move(args.queue_, t83); + MoveToDevice<T84>::move(args.queue_, t84); + MoveToDevice<T85>::move(args.queue_, t85); + MoveToDevice<T86>::move(args.queue_, t86); + MoveToDevice<T87>::move(args.queue_, t87); + MoveToDevice<T88>::move(args.queue_, t88); + MoveToDevice<T89>::move(args.queue_, t89); + MoveToDevice<T90>::move(args.queue_, t90); + MoveToDevice<T91>::move(args.queue_, t91); + MoveToDevice<T92>::move(args.queue_, t92); + MoveToDevice<T93>::move(args.queue_, t93); + MoveToDevice<T94>::move(args.queue_, t94); + MoveToDevice<T95>::move(args.queue_, t95); + MoveToDevice<T96>::move(args.queue_, t96); + MoveToDevice<T97>::move(args.queue_, t97); + MoveToDevice<T98>::move(args.queue_, t98); + MoveToDevice<T99>::move(args.queue_, t99); + MoveToDevice<T100>::move(args.queue_, t100); + MoveToDevice<T101>::move(args.queue_, t101); + MoveToDevice<T102>::move(args.queue_, t102); + MoveToDevice<T103>::move(args.queue_, t103); + MoveToDevice<T104>::move(args.queue_, t104); + MoveToDevice<T105>::move(args.queue_, t105); + MoveToDevice<T106>::move(args.queue_, t106); + MoveToDevice<T107>::move(args.queue_, t107); + MoveToDevice<T108>::move(args.queue_, t108); + MoveToDevice<T109>::move(args.queue_, t109); + MoveToDevice<T110>::move(args.queue_, t110); + MoveToDevice<T111>::move(args.queue_, t111); + MoveToDevice<T112>::move(args.queue_, t112); + MoveToDevice<T113>::move(args.queue_, t113); + MoveToDevice<T114>::move(args.queue_, t114); + MoveToDevice<T115>::move(args.queue_, t115); + MoveToDevice<T116>::move(args.queue_, t116); + MoveToDevice<T117>::move(args.queue_, t117); + MoveToDevice<T118>::move(args.queue_, t118); + MoveToDevice<T119>::move(args.queue_, t119); + MoveToDevice<T120>::move(args.queue_, t120); + MoveToDevice<T121>::move(args.queue_, t121); + MoveToDevice<T122>::move(args.queue_, t122); + MoveToDevice<T123>::move(args.queue_, t123); + MoveToDevice<T124>::move(args.queue_, t124); + MoveToDevice<T125>::move(args.queue_, t125); + MoveToDevice<T126>::move(args.queue_, t126); + MoveToDevice<T127>::move(args.queue_, t127); + + args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + NULL, + &event); + + // Move any arguments that need moving + MoveFromDevice<T0>::move(args.queue_, t0); + MoveFromDevice<T1>::move(args.queue_, t1); + MoveFromDevice<T2>::move(args.queue_, t2); + MoveFromDevice<T3>::move(args.queue_, t3); + MoveFromDevice<T4>::move(args.queue_, t4); + MoveFromDevice<T5>::move(args.queue_, t5); + MoveFromDevice<T6>::move(args.queue_, t6); + MoveFromDevice<T7>::move(args.queue_, t7); + MoveFromDevice<T8>::move(args.queue_, t8); + MoveFromDevice<T9>::move(args.queue_, t9); + MoveFromDevice<T10>::move(args.queue_, t10); + MoveFromDevice<T11>::move(args.queue_, t11); + MoveFromDevice<T12>::move(args.queue_, t12); + MoveFromDevice<T13>::move(args.queue_, t13); + MoveFromDevice<T14>::move(args.queue_, t14); + MoveFromDevice<T15>::move(args.queue_, t15); + MoveFromDevice<T16>::move(args.queue_, t16); + MoveFromDevice<T17>::move(args.queue_, t17); + MoveFromDevice<T18>::move(args.queue_, t18); + MoveFromDevice<T19>::move(args.queue_, t19); + MoveFromDevice<T20>::move(args.queue_, t20); + MoveFromDevice<T21>::move(args.queue_, t21); + MoveFromDevice<T22>::move(args.queue_, t22); + MoveFromDevice<T23>::move(args.queue_, t23); + MoveFromDevice<T24>::move(args.queue_, t24); + MoveFromDevice<T25>::move(args.queue_, t25); + MoveFromDevice<T26>::move(args.queue_, t26); + MoveFromDevice<T27>::move(args.queue_, t27); + MoveFromDevice<T28>::move(args.queue_, t28); + MoveFromDevice<T29>::move(args.queue_, t29); + MoveFromDevice<T30>::move(args.queue_, t30); + MoveFromDevice<T31>::move(args.queue_, t31); + MoveFromDevice<T32>::move(args.queue_, t32); + MoveFromDevice<T33>::move(args.queue_, t33); + MoveFromDevice<T34>::move(args.queue_, t34); + MoveFromDevice<T35>::move(args.queue_, t35); + MoveFromDevice<T36>::move(args.queue_, t36); + MoveFromDevice<T37>::move(args.queue_, t37); + MoveFromDevice<T38>::move(args.queue_, t38); + MoveFromDevice<T39>::move(args.queue_, t39); + MoveFromDevice<T40>::move(args.queue_, t40); + MoveFromDevice<T41>::move(args.queue_, t41); + MoveFromDevice<T42>::move(args.queue_, t42); + MoveFromDevice<T43>::move(args.queue_, t43); + MoveFromDevice<T44>::move(args.queue_, t44); + MoveFromDevice<T45>::move(args.queue_, t45); + MoveFromDevice<T46>::move(args.queue_, t46); + MoveFromDevice<T47>::move(args.queue_, t47); + MoveFromDevice<T48>::move(args.queue_, t48); + MoveFromDevice<T49>::move(args.queue_, t49); + MoveFromDevice<T50>::move(args.queue_, t50); + MoveFromDevice<T51>::move(args.queue_, t51); + MoveFromDevice<T52>::move(args.queue_, t52); + MoveFromDevice<T53>::move(args.queue_, t53); + MoveFromDevice<T54>::move(args.queue_, t54); + MoveFromDevice<T55>::move(args.queue_, t55); + MoveFromDevice<T56>::move(args.queue_, t56); + MoveFromDevice<T57>::move(args.queue_, t57); + MoveFromDevice<T58>::move(args.queue_, t58); + MoveFromDevice<T59>::move(args.queue_, t59); + MoveFromDevice<T60>::move(args.queue_, t60); + MoveFromDevice<T61>::move(args.queue_, t61); + MoveFromDevice<T62>::move(args.queue_, t62); + MoveFromDevice<T63>::move(args.queue_, t63); + MoveFromDevice<T64>::move(args.queue_, t64); + MoveFromDevice<T65>::move(args.queue_, t65); + MoveFromDevice<T66>::move(args.queue_, t66); + MoveFromDevice<T67>::move(args.queue_, t67); + MoveFromDevice<T68>::move(args.queue_, t68); + MoveFromDevice<T69>::move(args.queue_, t69); + MoveFromDevice<T70>::move(args.queue_, t70); + MoveFromDevice<T71>::move(args.queue_, t71); + MoveFromDevice<T72>::move(args.queue_, t72); + MoveFromDevice<T73>::move(args.queue_, t73); + MoveFromDevice<T74>::move(args.queue_, t74); + MoveFromDevice<T75>::move(args.queue_, t75); + MoveFromDevice<T76>::move(args.queue_, t76); + MoveFromDevice<T77>::move(args.queue_, t77); + MoveFromDevice<T78>::move(args.queue_, t78); + MoveFromDevice<T79>::move(args.queue_, t79); + MoveFromDevice<T80>::move(args.queue_, t80); + MoveFromDevice<T81>::move(args.queue_, t81); + MoveFromDevice<T82>::move(args.queue_, t82); + MoveFromDevice<T83>::move(args.queue_, t83); + MoveFromDevice<T84>::move(args.queue_, t84); + MoveFromDevice<T85>::move(args.queue_, t85); + MoveFromDevice<T86>::move(args.queue_, t86); + MoveFromDevice<T87>::move(args.queue_, t87); + MoveFromDevice<T88>::move(args.queue_, t88); + MoveFromDevice<T89>::move(args.queue_, t89); + MoveFromDevice<T90>::move(args.queue_, t90); + MoveFromDevice<T91>::move(args.queue_, t91); + MoveFromDevice<T92>::move(args.queue_, t92); + MoveFromDevice<T93>::move(args.queue_, t93); + MoveFromDevice<T94>::move(args.queue_, t94); + MoveFromDevice<T95>::move(args.queue_, t95); + MoveFromDevice<T96>::move(args.queue_, t96); + MoveFromDevice<T97>::move(args.queue_, t97); + MoveFromDevice<T98>::move(args.queue_, t98); + MoveFromDevice<T99>::move(args.queue_, t99); + MoveFromDevice<T100>::move(args.queue_, t100); + MoveFromDevice<T101>::move(args.queue_, t101); + MoveFromDevice<T102>::move(args.queue_, t102); + MoveFromDevice<T103>::move(args.queue_, t103); + MoveFromDevice<T104>::move(args.queue_, t104); + MoveFromDevice<T105>::move(args.queue_, t105); + MoveFromDevice<T106>::move(args.queue_, t106); + MoveFromDevice<T107>::move(args.queue_, t107); + MoveFromDevice<T108>::move(args.queue_, t108); + MoveFromDevice<T109>::move(args.queue_, t109); + MoveFromDevice<T110>::move(args.queue_, t110); + MoveFromDevice<T111>::move(args.queue_, t111); + MoveFromDevice<T112>::move(args.queue_, t112); + MoveFromDevice<T113>::move(args.queue_, t113); + MoveFromDevice<T114>::move(args.queue_, t114); + MoveFromDevice<T115>::move(args.queue_, t115); + MoveFromDevice<T116>::move(args.queue_, t116); + MoveFromDevice<T117>::move(args.queue_, t117); + MoveFromDevice<T118>::move(args.queue_, t118); + MoveFromDevice<T119>::move(args.queue_, t119); + MoveFromDevice<T120>::move(args.queue_, t120); + MoveFromDevice<T121>::move(args.queue_, t121); + MoveFromDevice<T122>::move(args.queue_, t122); + MoveFromDevice<T123>::move(args.queue_, t123); + MoveFromDevice<T124>::move(args.queue_, t124); + MoveFromDevice<T125>::move(args.queue_, t125); + MoveFromDevice<T126>::move(args.queue_, t126); + MoveFromDevice<T127>::move(args.queue_, t127); + + return event; + } + + Event operator() ( + const EnqueueArgs& args, + const Event& waitEvent, + T0 t0, + T1 t1 = NullType(), + T2 t2 = NullType(), + T3 t3 = NullType(), + T4 t4 = NullType(), + T5 t5 = NullType(), + T6 t6 = NullType(), + T7 t7 = NullType(), + T8 t8 = NullType(), + T9 t9 = NullType(), + T10 t10 = NullType(), + T11 t11 = NullType(), + T12 t12 = NullType(), + T13 t13 = NullType(), + T14 t14 = NullType(), + T15 t15 = NullType(), + T16 t16 = NullType(), + T17 t17 = NullType(), + T18 t18 = NullType(), + T19 t19 = NullType(), + T20 t20 = NullType(), + T21 t21 = NullType(), + T22 t22 = NullType(), + T23 t23 = NullType(), + T24 t24 = NullType(), + T25 t25 = NullType(), + T26 t26 = NullType(), + T27 t27 = NullType(), + T28 t28 = NullType(), + T29 t29 = NullType(), + T30 t30 = NullType(), + T31 t31 = NullType(), + T32 t32 = NullType(), + T33 t33 = NullType(), + T34 t34 = NullType(), + T35 t35 = NullType(), + T36 t36 = NullType(), + T37 t37 = NullType(), + T38 t38 = NullType(), + T39 t39 = NullType(), + T40 t40 = NullType(), + T41 t41 = NullType(), + T42 t42 = NullType(), + T43 t43 = NullType(), + T44 t44 = NullType(), + T45 t45 = NullType(), + T46 t46 = NullType(), + T47 t47 = NullType(), + T48 t48 = NullType(), + T49 t49 = NullType(), + T50 t50 = NullType(), + T51 t51 = NullType(), + T52 t52 = NullType(), + T53 t53 = NullType(), + T54 t54 = NullType(), + T55 t55 = NullType(), + T56 t56 = NullType(), + T57 t57 = NullType(), + T58 t58 = NullType(), + T59 t59 = NullType(), + T60 t60 = NullType(), + T61 t61 = NullType(), + T62 t62 = NullType(), + T63 t63 = NullType(), + T64 t64 = NullType(), + T65 t65 = NullType(), + T66 t66 = NullType(), + T67 t67 = NullType(), + T68 t68 = NullType(), + T69 t69 = NullType(), + T70 t70 = NullType(), + T71 t71 = NullType(), + T72 t72 = NullType(), + T73 t73 = NullType(), + T74 t74 = NullType(), + T75 t75 = NullType(), + T76 t76 = NullType(), + T77 t77 = NullType(), + T78 t78 = NullType(), + T79 t79 = NullType(), + T80 t80 = NullType(), + T81 t81 = NullType(), + T82 t82 = NullType(), + T83 t83 = NullType(), + T84 t84 = NullType(), + T85 t85 = NullType(), + T86 t86 = NullType(), + T87 t87 = NullType(), + T88 t88 = NullType(), + T89 t89 = NullType(), + T90 t90 = NullType(), + T91 t91 = NullType(), + T92 t92 = NullType(), + T93 t93 = NullType(), + T94 t94 = NullType(), + T95 t95 = NullType(), + T96 t96 = NullType(), + T97 t97 = NullType(), + T98 t98 = NullType(), + T99 t99 = NullType(), + T100 t100 = NullType(), + T101 t101 = NullType(), + T102 t102 = NullType(), + T103 t103 = NullType(), + T104 t104 = NullType(), + T105 t105 = NullType(), + T106 t106 = NullType(), + T107 t107 = NullType(), + T108 t108 = NullType(), + T109 t109 = NullType(), + T110 t110 = NullType(), + T111 t111 = NullType(), + T112 t112 = NullType(), + T113 t113 = NullType(), + T114 t114 = NullType(), + T115 t115 = NullType(), + T116 t116 = NullType(), + T117 t117 = NullType(), + T118 t118 = NullType(), + T119 t119 = NullType(), + T120 t120 = NullType(), + T121 t121 = NullType(), + T122 t122 = NullType(), + T123 t123 = NullType(), + T124 t124 = NullType(), + T125 t125 = NullType(), + T126 t126 = NullType(), + T127 t127 = NullType()) + { + Event event; + SetArg<0, T0>::set(kernel_, t0); + SetArg<1, T1>::set(kernel_, t1); + SetArg<2, T2>::set(kernel_, t2); + SetArg<3, T3>::set(kernel_, t3); + SetArg<4, T4>::set(kernel_, t4); + SetArg<5, T5>::set(kernel_, t5); + SetArg<6, T6>::set(kernel_, t6); + SetArg<7, T7>::set(kernel_, t7); + SetArg<8, T8>::set(kernel_, t8); + SetArg<9, T9>::set(kernel_, t9); + SetArg<10, T10>::set(kernel_, t10); + SetArg<11, T11>::set(kernel_, t11); + SetArg<12, T12>::set(kernel_, t12); + SetArg<13, T13>::set(kernel_, t13); + SetArg<14, T14>::set(kernel_, t14); + SetArg<15, T15>::set(kernel_, t15); + SetArg<16, T16>::set(kernel_, t16); + SetArg<17, T17>::set(kernel_, t17); + SetArg<18, T18>::set(kernel_, t18); + SetArg<19, T19>::set(kernel_, t19); + SetArg<20, T20>::set(kernel_, t20); + SetArg<21, T21>::set(kernel_, t21); + SetArg<22, T22>::set(kernel_, t22); + SetArg<23, T23>::set(kernel_, t23); + SetArg<24, T24>::set(kernel_, t24); + SetArg<25, T25>::set(kernel_, t25); + SetArg<26, T26>::set(kernel_, t26); + SetArg<27, T27>::set(kernel_, t27); + SetArg<28, T28>::set(kernel_, t28); + SetArg<29, T29>::set(kernel_, t29); + SetArg<30, T30>::set(kernel_, t30); + SetArg<31, T31>::set(kernel_, t31); + SetArg<32, T32>::set(kernel_, t32); + SetArg<33, T33>::set(kernel_, t33); + SetArg<34, T34>::set(kernel_, t34); + SetArg<35, T35>::set(kernel_, t35); + SetArg<36, T36>::set(kernel_, t36); + SetArg<37, T37>::set(kernel_, t37); + SetArg<38, T38>::set(kernel_, t38); + SetArg<39, T39>::set(kernel_, t39); + SetArg<40, T40>::set(kernel_, t40); + SetArg<41, T41>::set(kernel_, t41); + SetArg<42, T42>::set(kernel_, t42); + SetArg<43, T43>::set(kernel_, t43); + SetArg<44, T44>::set(kernel_, t44); + SetArg<45, T45>::set(kernel_, t45); + SetArg<46, T46>::set(kernel_, t46); + SetArg<47, T47>::set(kernel_, t47); + SetArg<48, T48>::set(kernel_, t48); + SetArg<49, T49>::set(kernel_, t49); + SetArg<50, T50>::set(kernel_, t50); + SetArg<51, T51>::set(kernel_, t51); + SetArg<52, T52>::set(kernel_, t52); + SetArg<53, T53>::set(kernel_, t53); + SetArg<54, T54>::set(kernel_, t54); + SetArg<55, T55>::set(kernel_, t55); + SetArg<56, T56>::set(kernel_, t56); + SetArg<57, T57>::set(kernel_, t57); + SetArg<58, T58>::set(kernel_, t58); + SetArg<59, T59>::set(kernel_, t59); + SetArg<60, T60>::set(kernel_, t60); + SetArg<61, T61>::set(kernel_, t61); + SetArg<62, T62>::set(kernel_, t62); + SetArg<63, T63>::set(kernel_, t63); + SetArg<64, T64>::set(kernel_, t64); + SetArg<65, T65>::set(kernel_, t65); + SetArg<66, T66>::set(kernel_, t66); + SetArg<67, T67>::set(kernel_, t67); + SetArg<68, T68>::set(kernel_, t68); + SetArg<69, T69>::set(kernel_, t69); + SetArg<70, T70>::set(kernel_, t70); + SetArg<71, T71>::set(kernel_, t71); + SetArg<72, T72>::set(kernel_, t72); + SetArg<73, T73>::set(kernel_, t73); + SetArg<74, T74>::set(kernel_, t74); + SetArg<75, T75>::set(kernel_, t75); + SetArg<76, T76>::set(kernel_, t76); + SetArg<77, T77>::set(kernel_, t77); + SetArg<78, T78>::set(kernel_, t78); + SetArg<79, T79>::set(kernel_, t79); + SetArg<80, T80>::set(kernel_, t80); + SetArg<81, T81>::set(kernel_, t81); + SetArg<82, T82>::set(kernel_, t82); + SetArg<83, T83>::set(kernel_, t83); + SetArg<84, T84>::set(kernel_, t84); + SetArg<85, T85>::set(kernel_, t85); + SetArg<86, T86>::set(kernel_, t86); + SetArg<87, T87>::set(kernel_, t87); + SetArg<88, T88>::set(kernel_, t88); + SetArg<89, T89>::set(kernel_, t89); + SetArg<90, T90>::set(kernel_, t90); + SetArg<91, T91>::set(kernel_, t91); + SetArg<92, T92>::set(kernel_, t92); + SetArg<93, T93>::set(kernel_, t93); + SetArg<94, T94>::set(kernel_, t94); + SetArg<95, T95>::set(kernel_, t95); + SetArg<96, T96>::set(kernel_, t96); + SetArg<97, T97>::set(kernel_, t97); + SetArg<98, T98>::set(kernel_, t98); + SetArg<99, T99>::set(kernel_, t99); + SetArg<100, T100>::set(kernel_, t100); + SetArg<101, T101>::set(kernel_, t101); + SetArg<102, T102>::set(kernel_, t102); + SetArg<103, T103>::set(kernel_, t103); + SetArg<104, T104>::set(kernel_, t104); + SetArg<105, T105>::set(kernel_, t105); + SetArg<106, T106>::set(kernel_, t106); + SetArg<107, T107>::set(kernel_, t107); + SetArg<108, T108>::set(kernel_, t108); + SetArg<109, T109>::set(kernel_, t109); + SetArg<110, T110>::set(kernel_, t110); + SetArg<111, T111>::set(kernel_, t111); + SetArg<112, T112>::set(kernel_, t112); + SetArg<113, T113>::set(kernel_, t113); + SetArg<114, T114>::set(kernel_, t114); + SetArg<115, T115>::set(kernel_, t115); + SetArg<116, T116>::set(kernel_, t116); + SetArg<117, T117>::set(kernel_, t117); + SetArg<118, T118>::set(kernel_, t118); + SetArg<119, T119>::set(kernel_, t119); + SetArg<120, T120>::set(kernel_, t120); + SetArg<121, T121>::set(kernel_, t121); + SetArg<122, T122>::set(kernel_, t122); + SetArg<123, T123>::set(kernel_, t123); + SetArg<124, T124>::set(kernel_, t124); + SetArg<125, T125>::set(kernel_, t125); + SetArg<126, T126>::set(kernel_, t126); + SetArg<127, T127>::set(kernel_, t127); + + // Move any arguments that need moving + MoveToDevice<T0>::move(args.queue_, t0); + MoveToDevice<T1>::move(args.queue_, t1); + MoveToDevice<T2>::move(args.queue_, t2); + MoveToDevice<T3>::move(args.queue_, t3); + MoveToDevice<T4>::move(args.queue_, t4); + MoveToDevice<T5>::move(args.queue_, t5); + MoveToDevice<T6>::move(args.queue_, t6); + MoveToDevice<T7>::move(args.queue_, t7); + MoveToDevice<T8>::move(args.queue_, t8); + MoveToDevice<T9>::move(args.queue_, t9); + MoveToDevice<T10>::move(args.queue_, t10); + MoveToDevice<T11>::move(args.queue_, t11); + MoveToDevice<T12>::move(args.queue_, t12); + MoveToDevice<T13>::move(args.queue_, t13); + MoveToDevice<T14>::move(args.queue_, t14); + MoveToDevice<T15>::move(args.queue_, t15); + MoveToDevice<T16>::move(args.queue_, t16); + MoveToDevice<T17>::move(args.queue_, t17); + MoveToDevice<T18>::move(args.queue_, t18); + MoveToDevice<T19>::move(args.queue_, t19); + MoveToDevice<T20>::move(args.queue_, t20); + MoveToDevice<T21>::move(args.queue_, t21); + MoveToDevice<T22>::move(args.queue_, t22); + MoveToDevice<T23>::move(args.queue_, t23); + MoveToDevice<T24>::move(args.queue_, t24); + MoveToDevice<T25>::move(args.queue_, t25); + MoveToDevice<T26>::move(args.queue_, t26); + MoveToDevice<T27>::move(args.queue_, t27); + MoveToDevice<T28>::move(args.queue_, t28); + MoveToDevice<T29>::move(args.queue_, t29); + MoveToDevice<T30>::move(args.queue_, t30); + MoveToDevice<T31>::move(args.queue_, t31); + MoveToDevice<T32>::move(args.queue_, t32); + MoveToDevice<T33>::move(args.queue_, t33); + MoveToDevice<T34>::move(args.queue_, t34); + MoveToDevice<T35>::move(args.queue_, t35); + MoveToDevice<T36>::move(args.queue_, t36); + MoveToDevice<T37>::move(args.queue_, t37); + MoveToDevice<T38>::move(args.queue_, t38); + MoveToDevice<T39>::move(args.queue_, t39); + MoveToDevice<T40>::move(args.queue_, t40); + MoveToDevice<T41>::move(args.queue_, t41); + MoveToDevice<T42>::move(args.queue_, t42); + MoveToDevice<T43>::move(args.queue_, t43); + MoveToDevice<T44>::move(args.queue_, t44); + MoveToDevice<T45>::move(args.queue_, t45); + MoveToDevice<T46>::move(args.queue_, t46); + MoveToDevice<T47>::move(args.queue_, t47); + MoveToDevice<T48>::move(args.queue_, t48); + MoveToDevice<T49>::move(args.queue_, t49); + MoveToDevice<T50>::move(args.queue_, t50); + MoveToDevice<T51>::move(args.queue_, t51); + MoveToDevice<T52>::move(args.queue_, t52); + MoveToDevice<T53>::move(args.queue_, t53); + MoveToDevice<T54>::move(args.queue_, t54); + MoveToDevice<T55>::move(args.queue_, t55); + MoveToDevice<T56>::move(args.queue_, t56); + MoveToDevice<T57>::move(args.queue_, t57); + MoveToDevice<T58>::move(args.queue_, t58); + MoveToDevice<T59>::move(args.queue_, t59); + MoveToDevice<T60>::move(args.queue_, t60); + MoveToDevice<T61>::move(args.queue_, t61); + MoveToDevice<T62>::move(args.queue_, t62); + MoveToDevice<T63>::move(args.queue_, t63); + MoveToDevice<T64>::move(args.queue_, t64); + MoveToDevice<T65>::move(args.queue_, t65); + MoveToDevice<T66>::move(args.queue_, t66); + MoveToDevice<T67>::move(args.queue_, t67); + MoveToDevice<T68>::move(args.queue_, t68); + MoveToDevice<T69>::move(args.queue_, t69); + MoveToDevice<T70>::move(args.queue_, t70); + MoveToDevice<T71>::move(args.queue_, t71); + MoveToDevice<T72>::move(args.queue_, t72); + MoveToDevice<T73>::move(args.queue_, t73); + MoveToDevice<T74>::move(args.queue_, t74); + MoveToDevice<T75>::move(args.queue_, t75); + MoveToDevice<T76>::move(args.queue_, t76); + MoveToDevice<T77>::move(args.queue_, t77); + MoveToDevice<T78>::move(args.queue_, t78); + MoveToDevice<T79>::move(args.queue_, t79); + MoveToDevice<T80>::move(args.queue_, t80); + MoveToDevice<T81>::move(args.queue_, t81); + MoveToDevice<T82>::move(args.queue_, t82); + MoveToDevice<T83>::move(args.queue_, t83); + MoveToDevice<T84>::move(args.queue_, t84); + MoveToDevice<T85>::move(args.queue_, t85); + MoveToDevice<T86>::move(args.queue_, t86); + MoveToDevice<T87>::move(args.queue_, t87); + MoveToDevice<T88>::move(args.queue_, t88); + MoveToDevice<T89>::move(args.queue_, t89); + MoveToDevice<T90>::move(args.queue_, t90); + MoveToDevice<T91>::move(args.queue_, t91); + MoveToDevice<T92>::move(args.queue_, t92); + MoveToDevice<T93>::move(args.queue_, t93); + MoveToDevice<T94>::move(args.queue_, t94); + MoveToDevice<T95>::move(args.queue_, t95); + MoveToDevice<T96>::move(args.queue_, t96); + MoveToDevice<T97>::move(args.queue_, t97); + MoveToDevice<T98>::move(args.queue_, t98); + MoveToDevice<T99>::move(args.queue_, t99); + MoveToDevice<T100>::move(args.queue_, t100); + MoveToDevice<T101>::move(args.queue_, t101); + MoveToDevice<T102>::move(args.queue_, t102); + MoveToDevice<T103>::move(args.queue_, t103); + MoveToDevice<T104>::move(args.queue_, t104); + MoveToDevice<T105>::move(args.queue_, t105); + MoveToDevice<T106>::move(args.queue_, t106); + MoveToDevice<T107>::move(args.queue_, t107); + MoveToDevice<T108>::move(args.queue_, t108); + MoveToDevice<T109>::move(args.queue_, t109); + MoveToDevice<T110>::move(args.queue_, t110); + MoveToDevice<T111>::move(args.queue_, t111); + MoveToDevice<T112>::move(args.queue_, t112); + MoveToDevice<T113>::move(args.queue_, t113); + MoveToDevice<T114>::move(args.queue_, t114); + MoveToDevice<T115>::move(args.queue_, t115); + MoveToDevice<T116>::move(args.queue_, t116); + MoveToDevice<T117>::move(args.queue_, t117); + MoveToDevice<T118>::move(args.queue_, t118); + MoveToDevice<T119>::move(args.queue_, t119); + MoveToDevice<T120>::move(args.queue_, t120); + MoveToDevice<T121>::move(args.queue_, t121); + MoveToDevice<T122>::move(args.queue_, t122); + MoveToDevice<T123>::move(args.queue_, t123); + MoveToDevice<T124>::move(args.queue_, t124); + MoveToDevice<T125>::move(args.queue_, t125); + MoveToDevice<T126>::move(args.queue_, t126); + MoveToDevice<T127>::move(args.queue_, t127); + + VECTOR_CLASS<Event> events(&waitEvent, &waitEvent); + + args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &events, + &event); + + // Move any arguments that need moving + MoveFromDevice<T0>::move(args.queue_, t0); + MoveFromDevice<T1>::move(args.queue_, t1); + MoveFromDevice<T2>::move(args.queue_, t2); + MoveFromDevice<T3>::move(args.queue_, t3); + MoveFromDevice<T4>::move(args.queue_, t4); + MoveFromDevice<T5>::move(args.queue_, t5); + MoveFromDevice<T6>::move(args.queue_, t6); + MoveFromDevice<T7>::move(args.queue_, t7); + MoveFromDevice<T8>::move(args.queue_, t8); + MoveFromDevice<T9>::move(args.queue_, t9); + MoveFromDevice<T10>::move(args.queue_, t10); + MoveFromDevice<T11>::move(args.queue_, t11); + MoveFromDevice<T12>::move(args.queue_, t12); + MoveFromDevice<T13>::move(args.queue_, t13); + MoveFromDevice<T14>::move(args.queue_, t14); + MoveFromDevice<T15>::move(args.queue_, t15); + MoveFromDevice<T16>::move(args.queue_, t16); + MoveFromDevice<T17>::move(args.queue_, t17); + MoveFromDevice<T18>::move(args.queue_, t18); + MoveFromDevice<T19>::move(args.queue_, t19); + MoveFromDevice<T20>::move(args.queue_, t20); + MoveFromDevice<T21>::move(args.queue_, t21); + MoveFromDevice<T22>::move(args.queue_, t22); + MoveFromDevice<T23>::move(args.queue_, t23); + MoveFromDevice<T24>::move(args.queue_, t24); + MoveFromDevice<T25>::move(args.queue_, t25); + MoveFromDevice<T26>::move(args.queue_, t26); + MoveFromDevice<T27>::move(args.queue_, t27); + MoveFromDevice<T28>::move(args.queue_, t28); + MoveFromDevice<T29>::move(args.queue_, t29); + MoveFromDevice<T30>::move(args.queue_, t30); + MoveFromDevice<T31>::move(args.queue_, t31); + MoveFromDevice<T32>::move(args.queue_, t32); + MoveFromDevice<T33>::move(args.queue_, t33); + MoveFromDevice<T34>::move(args.queue_, t34); + MoveFromDevice<T35>::move(args.queue_, t35); + MoveFromDevice<T36>::move(args.queue_, t36); + MoveFromDevice<T37>::move(args.queue_, t37); + MoveFromDevice<T38>::move(args.queue_, t38); + MoveFromDevice<T39>::move(args.queue_, t39); + MoveFromDevice<T40>::move(args.queue_, t40); + MoveFromDevice<T41>::move(args.queue_, t41); + MoveFromDevice<T42>::move(args.queue_, t42); + MoveFromDevice<T43>::move(args.queue_, t43); + MoveFromDevice<T44>::move(args.queue_, t44); + MoveFromDevice<T45>::move(args.queue_, t45); + MoveFromDevice<T46>::move(args.queue_, t46); + MoveFromDevice<T47>::move(args.queue_, t47); + MoveFromDevice<T48>::move(args.queue_, t48); + MoveFromDevice<T49>::move(args.queue_, t49); + MoveFromDevice<T50>::move(args.queue_, t50); + MoveFromDevice<T51>::move(args.queue_, t51); + MoveFromDevice<T52>::move(args.queue_, t52); + MoveFromDevice<T53>::move(args.queue_, t53); + MoveFromDevice<T54>::move(args.queue_, t54); + MoveFromDevice<T55>::move(args.queue_, t55); + MoveFromDevice<T56>::move(args.queue_, t56); + MoveFromDevice<T57>::move(args.queue_, t57); + MoveFromDevice<T58>::move(args.queue_, t58); + MoveFromDevice<T59>::move(args.queue_, t59); + MoveFromDevice<T60>::move(args.queue_, t60); + MoveFromDevice<T61>::move(args.queue_, t61); + MoveFromDevice<T62>::move(args.queue_, t62); + MoveFromDevice<T63>::move(args.queue_, t63); + MoveFromDevice<T64>::move(args.queue_, t64); + MoveFromDevice<T65>::move(args.queue_, t65); + MoveFromDevice<T66>::move(args.queue_, t66); + MoveFromDevice<T67>::move(args.queue_, t67); + MoveFromDevice<T68>::move(args.queue_, t68); + MoveFromDevice<T69>::move(args.queue_, t69); + MoveFromDevice<T70>::move(args.queue_, t70); + MoveFromDevice<T71>::move(args.queue_, t71); + MoveFromDevice<T72>::move(args.queue_, t72); + MoveFromDevice<T73>::move(args.queue_, t73); + MoveFromDevice<T74>::move(args.queue_, t74); + MoveFromDevice<T75>::move(args.queue_, t75); + MoveFromDevice<T76>::move(args.queue_, t76); + MoveFromDevice<T77>::move(args.queue_, t77); + MoveFromDevice<T78>::move(args.queue_, t78); + MoveFromDevice<T79>::move(args.queue_, t79); + MoveFromDevice<T80>::move(args.queue_, t80); + MoveFromDevice<T81>::move(args.queue_, t81); + MoveFromDevice<T82>::move(args.queue_, t82); + MoveFromDevice<T83>::move(args.queue_, t83); + MoveFromDevice<T84>::move(args.queue_, t84); + MoveFromDevice<T85>::move(args.queue_, t85); + MoveFromDevice<T86>::move(args.queue_, t86); + MoveFromDevice<T87>::move(args.queue_, t87); + MoveFromDevice<T88>::move(args.queue_, t88); + MoveFromDevice<T89>::move(args.queue_, t89); + MoveFromDevice<T90>::move(args.queue_, t90); + MoveFromDevice<T91>::move(args.queue_, t91); + MoveFromDevice<T92>::move(args.queue_, t92); + MoveFromDevice<T93>::move(args.queue_, t93); + MoveFromDevice<T94>::move(args.queue_, t94); + MoveFromDevice<T95>::move(args.queue_, t95); + MoveFromDevice<T96>::move(args.queue_, t96); + MoveFromDevice<T97>::move(args.queue_, t97); + MoveFromDevice<T98>::move(args.queue_, t98); + MoveFromDevice<T99>::move(args.queue_, t99); + MoveFromDevice<T100>::move(args.queue_, t100); + MoveFromDevice<T101>::move(args.queue_, t101); + MoveFromDevice<T102>::move(args.queue_, t102); + MoveFromDevice<T103>::move(args.queue_, t103); + MoveFromDevice<T104>::move(args.queue_, t104); + MoveFromDevice<T105>::move(args.queue_, t105); + MoveFromDevice<T106>::move(args.queue_, t106); + MoveFromDevice<T107>::move(args.queue_, t107); + MoveFromDevice<T108>::move(args.queue_, t108); + MoveFromDevice<T109>::move(args.queue_, t109); + MoveFromDevice<T110>::move(args.queue_, t110); + MoveFromDevice<T111>::move(args.queue_, t111); + MoveFromDevice<T112>::move(args.queue_, t112); + MoveFromDevice<T113>::move(args.queue_, t113); + MoveFromDevice<T114>::move(args.queue_, t114); + MoveFromDevice<T115>::move(args.queue_, t115); + MoveFromDevice<T116>::move(args.queue_, t116); + MoveFromDevice<T117>::move(args.queue_, t117); + MoveFromDevice<T118>::move(args.queue_, t118); + MoveFromDevice<T119>::move(args.queue_, t119); + MoveFromDevice<T120>::move(args.queue_, t120); + MoveFromDevice<T121>::move(args.queue_, t121); + MoveFromDevice<T122>::move(args.queue_, t122); + MoveFromDevice<T123>::move(args.queue_, t123); + MoveFromDevice<T124>::move(args.queue_, t124); + MoveFromDevice<T125>::move(args.queue_, t125); + MoveFromDevice<T126>::move(args.queue_, t126); + MoveFromDevice<T127>::move(args.queue_, t127); + return event; + } + + Event operator() ( + const EnqueueArgs& args, + const VECTOR_CLASS<Event>& waitEvents, + T0 t0, + T1 t1 = NullType(), + T2 t2 = NullType(), + T3 t3 = NullType(), + T4 t4 = NullType(), + T5 t5 = NullType(), + T6 t6 = NullType(), + T7 t7 = NullType(), + T8 t8 = NullType(), + T9 t9 = NullType(), + T10 t10 = NullType(), + T11 t11 = NullType(), + T12 t12 = NullType(), + T13 t13 = NullType(), + T14 t14 = NullType(), + T15 t15 = NullType(), + T16 t16 = NullType(), + T17 t17 = NullType(), + T18 t18 = NullType(), + T19 t19 = NullType(), + T20 t20 = NullType(), + T21 t21 = NullType(), + T22 t22 = NullType(), + T23 t23 = NullType(), + T24 t24 = NullType(), + T25 t25 = NullType(), + T26 t26 = NullType(), + T27 t27 = NullType(), + T28 t28 = NullType(), + T29 t29 = NullType(), + T30 t30 = NullType(), + T31 t31 = NullType(), + T32 t32 = NullType(), + T33 t33 = NullType(), + T34 t34 = NullType(), + T35 t35 = NullType(), + T36 t36 = NullType(), + T37 t37 = NullType(), + T38 t38 = NullType(), + T39 t39 = NullType(), + T40 t40 = NullType(), + T41 t41 = NullType(), + T42 t42 = NullType(), + T43 t43 = NullType(), + T44 t44 = NullType(), + T45 t45 = NullType(), + T46 t46 = NullType(), + T47 t47 = NullType(), + T48 t48 = NullType(), + T49 t49 = NullType(), + T50 t50 = NullType(), + T51 t51 = NullType(), + T52 t52 = NullType(), + T53 t53 = NullType(), + T54 t54 = NullType(), + T55 t55 = NullType(), + T56 t56 = NullType(), + T57 t57 = NullType(), + T58 t58 = NullType(), + T59 t59 = NullType(), + T60 t60 = NullType(), + T61 t61 = NullType(), + T62 t62 = NullType(), + T63 t63 = NullType(), + T64 t64 = NullType(), + T65 t65 = NullType(), + T66 t66 = NullType(), + T67 t67 = NullType(), + T68 t68 = NullType(), + T69 t69 = NullType(), + T70 t70 = NullType(), + T71 t71 = NullType(), + T72 t72 = NullType(), + T73 t73 = NullType(), + T74 t74 = NullType(), + T75 t75 = NullType(), + T76 t76 = NullType(), + T77 t77 = NullType(), + T78 t78 = NullType(), + T79 t79 = NullType(), + T80 t80 = NullType(), + T81 t81 = NullType(), + T82 t82 = NullType(), + T83 t83 = NullType(), + T84 t84 = NullType(), + T85 t85 = NullType(), + T86 t86 = NullType(), + T87 t87 = NullType(), + T88 t88 = NullType(), + T89 t89 = NullType(), + T90 t90 = NullType(), + T91 t91 = NullType(), + T92 t92 = NullType(), + T93 t93 = NullType(), + T94 t94 = NullType(), + T95 t95 = NullType(), + T96 t96 = NullType(), + T97 t97 = NullType(), + T98 t98 = NullType(), + T99 t99 = NullType(), + T100 t100 = NullType(), + T101 t101 = NullType(), + T102 t102 = NullType(), + T103 t103 = NullType(), + T104 t104 = NullType(), + T105 t105 = NullType(), + T106 t106 = NullType(), + T107 t107 = NullType(), + T108 t108 = NullType(), + T109 t109 = NullType(), + T110 t110 = NullType(), + T111 t111 = NullType(), + T112 t112 = NullType(), + T113 t113 = NullType(), + T114 t114 = NullType(), + T115 t115 = NullType(), + T116 t116 = NullType(), + T117 t117 = NullType(), + T118 t118 = NullType(), + T119 t119 = NullType(), + T120 t120 = NullType(), + T121 t121 = NullType(), + T122 t122 = NullType(), + T123 t123 = NullType(), + T124 t124 = NullType(), + T125 t125 = NullType(), + T126 t126 = NullType(), + T127 t127 = NullType()) + { + Event event; + SetArg<0, T0>::set(kernel_, t0); + SetArg<1, T1>::set(kernel_, t1); + SetArg<2, T2>::set(kernel_, t2); + SetArg<3, T3>::set(kernel_, t3); + SetArg<4, T4>::set(kernel_, t4); + SetArg<5, T5>::set(kernel_, t5); + SetArg<6, T6>::set(kernel_, t6); + SetArg<7, T7>::set(kernel_, t7); + SetArg<8, T8>::set(kernel_, t8); + SetArg<9, T9>::set(kernel_, t9); + SetArg<10, T10>::set(kernel_, t10); + SetArg<11, T11>::set(kernel_, t11); + SetArg<12, T12>::set(kernel_, t12); + SetArg<13, T13>::set(kernel_, t13); + SetArg<14, T14>::set(kernel_, t14); + SetArg<15, T15>::set(kernel_, t15); + SetArg<16, T16>::set(kernel_, t16); + SetArg<17, T17>::set(kernel_, t17); + SetArg<18, T18>::set(kernel_, t18); + SetArg<19, T19>::set(kernel_, t19); + SetArg<20, T20>::set(kernel_, t20); + SetArg<21, T21>::set(kernel_, t21); + SetArg<22, T22>::set(kernel_, t22); + SetArg<23, T23>::set(kernel_, t23); + SetArg<24, T24>::set(kernel_, t24); + SetArg<25, T25>::set(kernel_, t25); + SetArg<26, T26>::set(kernel_, t26); + SetArg<27, T27>::set(kernel_, t27); + SetArg<28, T28>::set(kernel_, t28); + SetArg<29, T29>::set(kernel_, t29); + SetArg<30, T30>::set(kernel_, t30); + SetArg<31, T31>::set(kernel_, t31); + SetArg<32, T32>::set(kernel_, t32); + SetArg<33, T33>::set(kernel_, t33); + SetArg<34, T34>::set(kernel_, t34); + SetArg<35, T35>::set(kernel_, t35); + SetArg<36, T36>::set(kernel_, t36); + SetArg<37, T37>::set(kernel_, t37); + SetArg<38, T38>::set(kernel_, t38); + SetArg<39, T39>::set(kernel_, t39); + SetArg<40, T40>::set(kernel_, t40); + SetArg<41, T41>::set(kernel_, t41); + SetArg<42, T42>::set(kernel_, t42); + SetArg<43, T43>::set(kernel_, t43); + SetArg<44, T44>::set(kernel_, t44); + SetArg<45, T45>::set(kernel_, t45); + SetArg<46, T46>::set(kernel_, t46); + SetArg<47, T47>::set(kernel_, t47); + SetArg<48, T48>::set(kernel_, t48); + SetArg<49, T49>::set(kernel_, t49); + SetArg<50, T50>::set(kernel_, t50); + SetArg<51, T51>::set(kernel_, t51); + SetArg<52, T52>::set(kernel_, t52); + SetArg<53, T53>::set(kernel_, t53); + SetArg<54, T54>::set(kernel_, t54); + SetArg<55, T55>::set(kernel_, t55); + SetArg<56, T56>::set(kernel_, t56); + SetArg<57, T57>::set(kernel_, t57); + SetArg<58, T58>::set(kernel_, t58); + SetArg<59, T59>::set(kernel_, t59); + SetArg<60, T60>::set(kernel_, t60); + SetArg<61, T61>::set(kernel_, t61); + SetArg<62, T62>::set(kernel_, t62); + SetArg<63, T63>::set(kernel_, t63); + SetArg<64, T64>::set(kernel_, t64); + SetArg<65, T65>::set(kernel_, t65); + SetArg<66, T66>::set(kernel_, t66); + SetArg<67, T67>::set(kernel_, t67); + SetArg<68, T68>::set(kernel_, t68); + SetArg<69, T69>::set(kernel_, t69); + SetArg<70, T70>::set(kernel_, t70); + SetArg<71, T71>::set(kernel_, t71); + SetArg<72, T72>::set(kernel_, t72); + SetArg<73, T73>::set(kernel_, t73); + SetArg<74, T74>::set(kernel_, t74); + SetArg<75, T75>::set(kernel_, t75); + SetArg<76, T76>::set(kernel_, t76); + SetArg<77, T77>::set(kernel_, t77); + SetArg<78, T78>::set(kernel_, t78); + SetArg<79, T79>::set(kernel_, t79); + SetArg<80, T80>::set(kernel_, t80); + SetArg<81, T81>::set(kernel_, t81); + SetArg<82, T82>::set(kernel_, t82); + SetArg<83, T83>::set(kernel_, t83); + SetArg<84, T84>::set(kernel_, t84); + SetArg<85, T85>::set(kernel_, t85); + SetArg<86, T86>::set(kernel_, t86); + SetArg<87, T87>::set(kernel_, t87); + SetArg<88, T88>::set(kernel_, t88); + SetArg<89, T89>::set(kernel_, t89); + SetArg<90, T90>::set(kernel_, t90); + SetArg<91, T91>::set(kernel_, t91); + SetArg<92, T92>::set(kernel_, t92); + SetArg<93, T93>::set(kernel_, t93); + SetArg<94, T94>::set(kernel_, t94); + SetArg<95, T95>::set(kernel_, t95); + SetArg<96, T96>::set(kernel_, t96); + SetArg<97, T97>::set(kernel_, t97); + SetArg<98, T98>::set(kernel_, t98); + SetArg<99, T99>::set(kernel_, t99); + SetArg<100, T100>::set(kernel_, t100); + SetArg<101, T101>::set(kernel_, t101); + SetArg<102, T102>::set(kernel_, t102); + SetArg<103, T103>::set(kernel_, t103); + SetArg<104, T104>::set(kernel_, t104); + SetArg<105, T105>::set(kernel_, t105); + SetArg<106, T106>::set(kernel_, t106); + SetArg<107, T107>::set(kernel_, t107); + SetArg<108, T108>::set(kernel_, t108); + SetArg<109, T109>::set(kernel_, t109); + SetArg<110, T110>::set(kernel_, t110); + SetArg<111, T111>::set(kernel_, t111); + SetArg<112, T112>::set(kernel_, t112); + SetArg<113, T113>::set(kernel_, t113); + SetArg<114, T114>::set(kernel_, t114); + SetArg<115, T115>::set(kernel_, t115); + SetArg<116, T116>::set(kernel_, t116); + SetArg<117, T117>::set(kernel_, t117); + SetArg<118, T118>::set(kernel_, t118); + SetArg<119, T119>::set(kernel_, t119); + SetArg<120, T120>::set(kernel_, t120); + SetArg<121, T121>::set(kernel_, t121); + SetArg<122, T122>::set(kernel_, t122); + SetArg<123, T123>::set(kernel_, t123); + SetArg<124, T124>::set(kernel_, t124); + SetArg<125, T125>::set(kernel_, t125); + SetArg<126, T126>::set(kernel_, t126); + SetArg<127, T127>::set(kernel_, t127); + + // Move any arguments that need moving + MoveToDevice<T0>::move(args.queue_, t0); + MoveToDevice<T1>::move(args.queue_, t1); + MoveToDevice<T2>::move(args.queue_, t2); + MoveToDevice<T3>::move(args.queue_, t3); + MoveToDevice<T4>::move(args.queue_, t4); + MoveToDevice<T5>::move(args.queue_, t5); + MoveToDevice<T6>::move(args.queue_, t6); + MoveToDevice<T7>::move(args.queue_, t7); + MoveToDevice<T8>::move(args.queue_, t8); + MoveToDevice<T9>::move(args.queue_, t9); + MoveToDevice<T10>::move(args.queue_, t10); + MoveToDevice<T11>::move(args.queue_, t11); + MoveToDevice<T12>::move(args.queue_, t12); + MoveToDevice<T13>::move(args.queue_, t13); + MoveToDevice<T14>::move(args.queue_, t14); + MoveToDevice<T15>::move(args.queue_, t15); + MoveToDevice<T16>::move(args.queue_, t16); + MoveToDevice<T17>::move(args.queue_, t17); + MoveToDevice<T18>::move(args.queue_, t18); + MoveToDevice<T19>::move(args.queue_, t19); + MoveToDevice<T20>::move(args.queue_, t20); + MoveToDevice<T21>::move(args.queue_, t21); + MoveToDevice<T22>::move(args.queue_, t22); + MoveToDevice<T23>::move(args.queue_, t23); + MoveToDevice<T24>::move(args.queue_, t24); + MoveToDevice<T25>::move(args.queue_, t25); + MoveToDevice<T26>::move(args.queue_, t26); + MoveToDevice<T27>::move(args.queue_, t27); + MoveToDevice<T28>::move(args.queue_, t28); + MoveToDevice<T29>::move(args.queue_, t29); + MoveToDevice<T30>::move(args.queue_, t30); + MoveToDevice<T31>::move(args.queue_, t31); + MoveToDevice<T32>::move(args.queue_, t32); + MoveToDevice<T33>::move(args.queue_, t33); + MoveToDevice<T34>::move(args.queue_, t34); + MoveToDevice<T35>::move(args.queue_, t35); + MoveToDevice<T36>::move(args.queue_, t36); + MoveToDevice<T37>::move(args.queue_, t37); + MoveToDevice<T38>::move(args.queue_, t38); + MoveToDevice<T39>::move(args.queue_, t39); + MoveToDevice<T40>::move(args.queue_, t40); + MoveToDevice<T41>::move(args.queue_, t41); + MoveToDevice<T42>::move(args.queue_, t42); + MoveToDevice<T43>::move(args.queue_, t43); + MoveToDevice<T44>::move(args.queue_, t44); + MoveToDevice<T45>::move(args.queue_, t45); + MoveToDevice<T46>::move(args.queue_, t46); + MoveToDevice<T47>::move(args.queue_, t47); + MoveToDevice<T48>::move(args.queue_, t48); + MoveToDevice<T49>::move(args.queue_, t49); + MoveToDevice<T50>::move(args.queue_, t50); + MoveToDevice<T51>::move(args.queue_, t51); + MoveToDevice<T52>::move(args.queue_, t52); + MoveToDevice<T53>::move(args.queue_, t53); + MoveToDevice<T54>::move(args.queue_, t54); + MoveToDevice<T55>::move(args.queue_, t55); + MoveToDevice<T56>::move(args.queue_, t56); + MoveToDevice<T57>::move(args.queue_, t57); + MoveToDevice<T58>::move(args.queue_, t58); + MoveToDevice<T59>::move(args.queue_, t59); + MoveToDevice<T60>::move(args.queue_, t60); + MoveToDevice<T61>::move(args.queue_, t61); + MoveToDevice<T62>::move(args.queue_, t62); + MoveToDevice<T63>::move(args.queue_, t63); + MoveToDevice<T64>::move(args.queue_, t64); + MoveToDevice<T65>::move(args.queue_, t65); + MoveToDevice<T66>::move(args.queue_, t66); + MoveToDevice<T67>::move(args.queue_, t67); + MoveToDevice<T68>::move(args.queue_, t68); + MoveToDevice<T69>::move(args.queue_, t69); + MoveToDevice<T70>::move(args.queue_, t70); + MoveToDevice<T71>::move(args.queue_, t71); + MoveToDevice<T72>::move(args.queue_, t72); + MoveToDevice<T73>::move(args.queue_, t73); + MoveToDevice<T74>::move(args.queue_, t74); + MoveToDevice<T75>::move(args.queue_, t75); + MoveToDevice<T76>::move(args.queue_, t76); + MoveToDevice<T77>::move(args.queue_, t77); + MoveToDevice<T78>::move(args.queue_, t78); + MoveToDevice<T79>::move(args.queue_, t79); + MoveToDevice<T80>::move(args.queue_, t80); + MoveToDevice<T81>::move(args.queue_, t81); + MoveToDevice<T82>::move(args.queue_, t82); + MoveToDevice<T83>::move(args.queue_, t83); + MoveToDevice<T84>::move(args.queue_, t84); + MoveToDevice<T85>::move(args.queue_, t85); + MoveToDevice<T86>::move(args.queue_, t86); + MoveToDevice<T87>::move(args.queue_, t87); + MoveToDevice<T88>::move(args.queue_, t88); + MoveToDevice<T89>::move(args.queue_, t89); + MoveToDevice<T90>::move(args.queue_, t90); + MoveToDevice<T91>::move(args.queue_, t91); + MoveToDevice<T92>::move(args.queue_, t92); + MoveToDevice<T93>::move(args.queue_, t93); + MoveToDevice<T94>::move(args.queue_, t94); + MoveToDevice<T95>::move(args.queue_, t95); + MoveToDevice<T96>::move(args.queue_, t96); + MoveToDevice<T97>::move(args.queue_, t97); + MoveToDevice<T98>::move(args.queue_, t98); + MoveToDevice<T99>::move(args.queue_, t99); + MoveToDevice<T100>::move(args.queue_, t100); + MoveToDevice<T101>::move(args.queue_, t101); + MoveToDevice<T102>::move(args.queue_, t102); + MoveToDevice<T103>::move(args.queue_, t103); + MoveToDevice<T104>::move(args.queue_, t104); + MoveToDevice<T105>::move(args.queue_, t105); + MoveToDevice<T106>::move(args.queue_, t106); + MoveToDevice<T107>::move(args.queue_, t107); + MoveToDevice<T108>::move(args.queue_, t108); + MoveToDevice<T109>::move(args.queue_, t109); + MoveToDevice<T110>::move(args.queue_, t110); + MoveToDevice<T111>::move(args.queue_, t111); + MoveToDevice<T112>::move(args.queue_, t112); + MoveToDevice<T113>::move(args.queue_, t113); + MoveToDevice<T114>::move(args.queue_, t114); + MoveToDevice<T115>::move(args.queue_, t115); + MoveToDevice<T116>::move(args.queue_, t116); + MoveToDevice<T117>::move(args.queue_, t117); + MoveToDevice<T118>::move(args.queue_, t118); + MoveToDevice<T119>::move(args.queue_, t119); + MoveToDevice<T120>::move(args.queue_, t120); + MoveToDevice<T121>::move(args.queue_, t121); + MoveToDevice<T122>::move(args.queue_, t122); + MoveToDevice<T123>::move(args.queue_, t123); + MoveToDevice<T124>::move(args.queue_, t124); + MoveToDevice<T125>::move(args.queue_, t125); + MoveToDevice<T126>::move(args.queue_, t126); + MoveToDevice<T127>::move(args.queue_, t127); + + args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &waitEvents, + &event); + + // Move any arguments that need moving + MoveFromDevice<T0>::move(args.queue_, t0); + MoveFromDevice<T1>::move(args.queue_, t1); + MoveFromDevice<T2>::move(args.queue_, t2); + MoveFromDevice<T3>::move(args.queue_, t3); + MoveFromDevice<T4>::move(args.queue_, t4); + MoveFromDevice<T5>::move(args.queue_, t5); + MoveFromDevice<T6>::move(args.queue_, t6); + MoveFromDevice<T7>::move(args.queue_, t7); + MoveFromDevice<T8>::move(args.queue_, t8); + MoveFromDevice<T9>::move(args.queue_, t9); + MoveFromDevice<T10>::move(args.queue_, t10); + MoveFromDevice<T11>::move(args.queue_, t11); + MoveFromDevice<T12>::move(args.queue_, t12); + MoveFromDevice<T13>::move(args.queue_, t13); + MoveFromDevice<T14>::move(args.queue_, t14); + MoveFromDevice<T15>::move(args.queue_, t15); + MoveFromDevice<T16>::move(args.queue_, t16); + MoveFromDevice<T17>::move(args.queue_, t17); + MoveFromDevice<T18>::move(args.queue_, t18); + MoveFromDevice<T19>::move(args.queue_, t19); + MoveFromDevice<T20>::move(args.queue_, t20); + MoveFromDevice<T21>::move(args.queue_, t21); + MoveFromDevice<T22>::move(args.queue_, t22); + MoveFromDevice<T23>::move(args.queue_, t23); + MoveFromDevice<T24>::move(args.queue_, t24); + MoveFromDevice<T25>::move(args.queue_, t25); + MoveFromDevice<T26>::move(args.queue_, t26); + MoveFromDevice<T27>::move(args.queue_, t27); + MoveFromDevice<T28>::move(args.queue_, t28); + MoveFromDevice<T29>::move(args.queue_, t29); + MoveFromDevice<T30>::move(args.queue_, t30); + MoveFromDevice<T31>::move(args.queue_, t31); + MoveFromDevice<T32>::move(args.queue_, t32); + MoveFromDevice<T33>::move(args.queue_, t33); + MoveFromDevice<T34>::move(args.queue_, t34); + MoveFromDevice<T35>::move(args.queue_, t35); + MoveFromDevice<T36>::move(args.queue_, t36); + MoveFromDevice<T37>::move(args.queue_, t37); + MoveFromDevice<T38>::move(args.queue_, t38); + MoveFromDevice<T39>::move(args.queue_, t39); + MoveFromDevice<T40>::move(args.queue_, t40); + MoveFromDevice<T41>::move(args.queue_, t41); + MoveFromDevice<T42>::move(args.queue_, t42); + MoveFromDevice<T43>::move(args.queue_, t43); + MoveFromDevice<T44>::move(args.queue_, t44); + MoveFromDevice<T45>::move(args.queue_, t45); + MoveFromDevice<T46>::move(args.queue_, t46); + MoveFromDevice<T47>::move(args.queue_, t47); + MoveFromDevice<T48>::move(args.queue_, t48); + MoveFromDevice<T49>::move(args.queue_, t49); + MoveFromDevice<T50>::move(args.queue_, t50); + MoveFromDevice<T51>::move(args.queue_, t51); + MoveFromDevice<T52>::move(args.queue_, t52); + MoveFromDevice<T53>::move(args.queue_, t53); + MoveFromDevice<T54>::move(args.queue_, t54); + MoveFromDevice<T55>::move(args.queue_, t55); + MoveFromDevice<T56>::move(args.queue_, t56); + MoveFromDevice<T57>::move(args.queue_, t57); + MoveFromDevice<T58>::move(args.queue_, t58); + MoveFromDevice<T59>::move(args.queue_, t59); + MoveFromDevice<T60>::move(args.queue_, t60); + MoveFromDevice<T61>::move(args.queue_, t61); + MoveFromDevice<T62>::move(args.queue_, t62); + MoveFromDevice<T63>::move(args.queue_, t63); + MoveFromDevice<T64>::move(args.queue_, t64); + MoveFromDevice<T65>::move(args.queue_, t65); + MoveFromDevice<T66>::move(args.queue_, t66); + MoveFromDevice<T67>::move(args.queue_, t67); + MoveFromDevice<T68>::move(args.queue_, t68); + MoveFromDevice<T69>::move(args.queue_, t69); + MoveFromDevice<T70>::move(args.queue_, t70); + MoveFromDevice<T71>::move(args.queue_, t71); + MoveFromDevice<T72>::move(args.queue_, t72); + MoveFromDevice<T73>::move(args.queue_, t73); + MoveFromDevice<T74>::move(args.queue_, t74); + MoveFromDevice<T75>::move(args.queue_, t75); + MoveFromDevice<T76>::move(args.queue_, t76); + MoveFromDevice<T77>::move(args.queue_, t77); + MoveFromDevice<T78>::move(args.queue_, t78); + MoveFromDevice<T79>::move(args.queue_, t79); + MoveFromDevice<T80>::move(args.queue_, t80); + MoveFromDevice<T81>::move(args.queue_, t81); + MoveFromDevice<T82>::move(args.queue_, t82); + MoveFromDevice<T83>::move(args.queue_, t83); + MoveFromDevice<T84>::move(args.queue_, t84); + MoveFromDevice<T85>::move(args.queue_, t85); + MoveFromDevice<T86>::move(args.queue_, t86); + MoveFromDevice<T87>::move(args.queue_, t87); + MoveFromDevice<T88>::move(args.queue_, t88); + MoveFromDevice<T89>::move(args.queue_, t89); + MoveFromDevice<T90>::move(args.queue_, t90); + MoveFromDevice<T91>::move(args.queue_, t91); + MoveFromDevice<T92>::move(args.queue_, t92); + MoveFromDevice<T93>::move(args.queue_, t93); + MoveFromDevice<T94>::move(args.queue_, t94); + MoveFromDevice<T95>::move(args.queue_, t95); + MoveFromDevice<T96>::move(args.queue_, t96); + MoveFromDevice<T97>::move(args.queue_, t97); + MoveFromDevice<T98>::move(args.queue_, t98); + MoveFromDevice<T99>::move(args.queue_, t99); + MoveFromDevice<T100>::move(args.queue_, t100); + MoveFromDevice<T101>::move(args.queue_, t101); + MoveFromDevice<T102>::move(args.queue_, t102); + MoveFromDevice<T103>::move(args.queue_, t103); + MoveFromDevice<T104>::move(args.queue_, t104); + MoveFromDevice<T105>::move(args.queue_, t105); + MoveFromDevice<T106>::move(args.queue_, t106); + MoveFromDevice<T107>::move(args.queue_, t107); + MoveFromDevice<T108>::move(args.queue_, t108); + MoveFromDevice<T109>::move(args.queue_, t109); + MoveFromDevice<T110>::move(args.queue_, t110); + MoveFromDevice<T111>::move(args.queue_, t111); + MoveFromDevice<T112>::move(args.queue_, t112); + MoveFromDevice<T113>::move(args.queue_, t113); + MoveFromDevice<T114>::move(args.queue_, t114); + MoveFromDevice<T115>::move(args.queue_, t115); + MoveFromDevice<T116>::move(args.queue_, t116); + MoveFromDevice<T117>::move(args.queue_, t117); + MoveFromDevice<T118>::move(args.queue_, t118); + MoveFromDevice<T119>::move(args.queue_, t119); + MoveFromDevice<T120>::move(args.queue_, t120); + MoveFromDevice<T121>::move(args.queue_, t121); + MoveFromDevice<T122>::move(args.queue_, t122); + MoveFromDevice<T123>::move(args.queue_, t123); + MoveFromDevice<T124>::move(args.queue_, t124); + MoveFromDevice<T125>::move(args.queue_, t125); + MoveFromDevice<T126>::move(args.queue_, t126); + MoveFromDevice<T127>::move(args.queue_, t127); + + return event; + } +}; + +//------------------------------------------------------------------------------------------------------ + + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116, + typename T117, + typename T118, + typename T119, + typename T120, + typename T121, + typename T122, + typename T123, + typename T124, + typename T125, + typename T126, + typename T127> +struct functionImplementation_ +{ +}; + +template< + typename T0> +struct functionImplementation_< + T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0) + { + return functor_( + enqueueArgs, + arg0); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1> +struct functionImplementation_< + T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1) + { + return functor_( + enqueueArgs, + arg0, + arg1); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2> +struct functionImplementation_< + T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3> +struct functionImplementation_< + T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115, + T116 arg116) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115, + arg116); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116, + typename T117> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115, + T116 arg116, + T117 arg117) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115, + arg116, + arg117); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116, + typename T117, + typename T118> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115, + T116 arg116, + T117 arg117, + T118 arg118) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115, + arg116, + arg117, + arg118); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116, + typename T117, + typename T118, + typename T119> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115, + T116 arg116, + T117 arg117, + T118 arg118, + T119 arg119) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115, + arg116, + arg117, + arg118, + arg119); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116, + typename T117, + typename T118, + typename T119, + typename T120> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115, + T116 arg116, + T117 arg117, + T118 arg118, + T119 arg119, + T120 arg120) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115, + arg116, + arg117, + arg118, + arg119, + arg120); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116, + typename T117, + typename T118, + typename T119, + typename T120, + typename T121> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + NullType, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115, + T116 arg116, + T117 arg117, + T118 arg118, + T119 arg119, + T120 arg120, + T121 arg121) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115, + arg116, + arg117, + arg118, + arg119, + arg120, + arg121); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116, + typename T117, + typename T118, + typename T119, + typename T120, + typename T121, + typename T122> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + NullType, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + NullType, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115, + T116 arg116, + T117 arg117, + T118 arg118, + T119 arg119, + T120 arg120, + T121 arg121, + T122 arg122) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115, + arg116, + arg117, + arg118, + arg119, + arg120, + arg121, + arg122); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116, + typename T117, + typename T118, + typename T119, + typename T120, + typename T121, + typename T122, + typename T123> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123, + NullType, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123, + NullType, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115, + T116 arg116, + T117 arg117, + T118 arg118, + T119 arg119, + T120 arg120, + T121 arg121, + T122 arg122, + T123 arg123) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115, + arg116, + arg117, + arg118, + arg119, + arg120, + arg121, + arg122, + arg123); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116, + typename T117, + typename T118, + typename T119, + typename T120, + typename T121, + typename T122, + typename T123, + typename T124> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123, + T124, + NullType, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123, + T124, + NullType, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123, + T124)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115, + T116 arg116, + T117 arg117, + T118 arg118, + T119 arg119, + T120 arg120, + T121 arg121, + T122 arg122, + T123 arg123, + T124 arg124) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115, + arg116, + arg117, + arg118, + arg119, + arg120, + arg121, + arg122, + arg123, + arg124); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116, + typename T117, + typename T118, + typename T119, + typename T120, + typename T121, + typename T122, + typename T123, + typename T124, + typename T125> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123, + T124, + T125, + NullType, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123, + T124, + T125, + NullType, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123, + T124, + T125)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115, + T116 arg116, + T117 arg117, + T118 arg118, + T119 arg119, + T120 arg120, + T121 arg121, + T122 arg122, + T123 arg123, + T124 arg124, + T125 arg125) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115, + arg116, + arg117, + arg118, + arg119, + arg120, + arg121, + arg122, + arg123, + arg124, + arg125); + } + + operator type_ () + { + return type_(*this); + } + +}; + +template< + typename T0, + typename T1, + typename T2, + typename T3, + typename T4, + typename T5, + typename T6, + typename T7, + typename T8, + typename T9, + typename T10, + typename T11, + typename T12, + typename T13, + typename T14, + typename T15, + typename T16, + typename T17, + typename T18, + typename T19, + typename T20, + typename T21, + typename T22, + typename T23, + typename T24, + typename T25, + typename T26, + typename T27, + typename T28, + typename T29, + typename T30, + typename T31, + typename T32, + typename T33, + typename T34, + typename T35, + typename T36, + typename T37, + typename T38, + typename T39, + typename T40, + typename T41, + typename T42, + typename T43, + typename T44, + typename T45, + typename T46, + typename T47, + typename T48, + typename T49, + typename T50, + typename T51, + typename T52, + typename T53, + typename T54, + typename T55, + typename T56, + typename T57, + typename T58, + typename T59, + typename T60, + typename T61, + typename T62, + typename T63, + typename T64, + typename T65, + typename T66, + typename T67, + typename T68, + typename T69, + typename T70, + typename T71, + typename T72, + typename T73, + typename T74, + typename T75, + typename T76, + typename T77, + typename T78, + typename T79, + typename T80, + typename T81, + typename T82, + typename T83, + typename T84, + typename T85, + typename T86, + typename T87, + typename T88, + typename T89, + typename T90, + typename T91, + typename T92, + typename T93, + typename T94, + typename T95, + typename T96, + typename T97, + typename T98, + typename T99, + typename T100, + typename T101, + typename T102, + typename T103, + typename T104, + typename T105, + typename T106, + typename T107, + typename T108, + typename T109, + typename T110, + typename T111, + typename T112, + typename T113, + typename T114, + typename T115, + typename T116, + typename T117, + typename T118, + typename T119, + typename T120, + typename T121, + typename T122, + typename T123, + typename T124, + typename T125, + typename T126> +struct functionImplementation_< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123, + T124, + T125, + T126, + NullType> +{ + typedef detail::KernelFunctorGlobal< + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123, + T124, + T125, + T126, + NullType> FunctorType; + + FunctorType functor_; + + functionImplementation_(const FunctorType &functor) : + functor_(functor) + { + } + + typedef std::tr1::function<Event ( + const EnqueueArgs&, + T0, + T1, + T2, + T3, + T4, + T5, + T6, + T7, + T8, + T9, + T10, + T11, + T12, + T13, + T14, + T15, + T16, + T17, + T18, + T19, + T20, + T21, + T22, + T23, + T24, + T25, + T26, + T27, + T28, + T29, + T30, + T31, + T32, + T33, + T34, + T35, + T36, + T37, + T38, + T39, + T40, + T41, + T42, + T43, + T44, + T45, + T46, + T47, + T48, + T49, + T50, + T51, + T52, + T53, + T54, + T55, + T56, + T57, + T58, + T59, + T60, + T61, + T62, + T63, + T64, + T65, + T66, + T67, + T68, + T69, + T70, + T71, + T72, + T73, + T74, + T75, + T76, + T77, + T78, + T79, + T80, + T81, + T82, + T83, + T84, + T85, + T86, + T87, + T88, + T89, + T90, + T91, + T92, + T93, + T94, + T95, + T96, + T97, + T98, + T99, + T100, + T101, + T102, + T103, + T104, + T105, + T106, + T107, + T108, + T109, + T110, + T111, + T112, + T113, + T114, + T115, + T116, + T117, + T118, + T119, + T120, + T121, + T122, + T123, + T124, + T125, + T126)> type_; + + Event operator()(const EnqueueArgs& enqueueArgs, + T0 arg0, + T1 arg1, + T2 arg2, + T3 arg3, + T4 arg4, + T5 arg5, + T6 arg6, + T7 arg7, + T8 arg8, + T9 arg9, + T10 arg10, + T11 arg11, + T12 arg12, + T13 arg13, + T14 arg14, + T15 arg15, + T16 arg16, + T17 arg17, + T18 arg18, + T19 arg19, + T20 arg20, + T21 arg21, + T22 arg22, + T23 arg23, + T24 arg24, + T25 arg25, + T26 arg26, + T27 arg27, + T28 arg28, + T29 arg29, + T30 arg30, + T31 arg31, + T32 arg32, + T33 arg33, + T34 arg34, + T35 arg35, + T36 arg36, + T37 arg37, + T38 arg38, + T39 arg39, + T40 arg40, + T41 arg41, + T42 arg42, + T43 arg43, + T44 arg44, + T45 arg45, + T46 arg46, + T47 arg47, + T48 arg48, + T49 arg49, + T50 arg50, + T51 arg51, + T52 arg52, + T53 arg53, + T54 arg54, + T55 arg55, + T56 arg56, + T57 arg57, + T58 arg58, + T59 arg59, + T60 arg60, + T61 arg61, + T62 arg62, + T63 arg63, + T64 arg64, + T65 arg65, + T66 arg66, + T67 arg67, + T68 arg68, + T69 arg69, + T70 arg70, + T71 arg71, + T72 arg72, + T73 arg73, + T74 arg74, + T75 arg75, + T76 arg76, + T77 arg77, + T78 arg78, + T79 arg79, + T80 arg80, + T81 arg81, + T82 arg82, + T83 arg83, + T84 arg84, + T85 arg85, + T86 arg86, + T87 arg87, + T88 arg88, + T89 arg89, + T90 arg90, + T91 arg91, + T92 arg92, + T93 arg93, + T94 arg94, + T95 arg95, + T96 arg96, + T97 arg97, + T98 arg98, + T99 arg99, + T100 arg100, + T101 arg101, + T102 arg102, + T103 arg103, + T104 arg104, + T105 arg105, + T106 arg106, + T107 arg107, + T108 arg108, + T109 arg109, + T110 arg110, + T111 arg111, + T112 arg112, + T113 arg113, + T114 arg114, + T115 arg115, + T116 arg116, + T117 arg117, + T118 arg118, + T119 arg119, + T120 arg120, + T121 arg121, + T122 arg122, + T123 arg123, + T124 arg124, + T125 arg125, + T126 arg126) + { + return functor_( + enqueueArgs, + arg0, + arg1, + arg2, + arg3, + arg4, + arg5, + arg6, + arg7, + arg8, + arg9, + arg10, + arg11, + arg12, + arg13, + arg14, + arg15, + arg16, + arg17, + arg18, + arg19, + arg20, + arg21, + arg22, + arg23, + arg24, + arg25, + arg26, + arg27, + arg28, + arg29, + arg30, + arg31, + arg32, + arg33, + arg34, + arg35, + arg36, + arg37, + arg38, + arg39, + arg40, + arg41, + arg42, + arg43, + arg44, + arg45, + arg46, + arg47, + arg48, + arg49, + arg50, + arg51, + arg52, + arg53, + arg54, + arg55, + arg56, + arg57, + arg58, + arg59, + arg60, + arg61, + arg62, + arg63, + arg64, + arg65, + arg66, + arg67, + arg68, + arg69, + arg70, + arg71, + arg72, + arg73, + arg74, + arg75, + arg76, + arg77, + arg78, + arg79, + arg80, + arg81, + arg82, + arg83, + arg84, + arg85, + arg86, + arg87, + arg88, + arg89, + arg90, + arg91, + arg92, + arg93, + arg94, + arg95, + arg96, + arg97, + arg98, + arg99, + arg100, + arg101, + arg102, + arg103, + arg104, + arg105, + arg106, + arg107, + arg108, + arg109, + arg110, + arg111, + arg112, + arg113, + arg114, + arg115, + arg116, + arg117, + arg118, + arg119, + arg120, + arg121, + arg122, + arg123, + arg124, + arg125, + arg126); + } + + operator type_ () + { + return type_(*this); + } + +}; + + + + + +} // namespace detail + +//---------------------------------------------------------------------------------------------- + +template < + typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, + typename T3 = detail::NullType, typename T4 = detail::NullType, + typename T5 = detail::NullType, typename T6 = detail::NullType, + typename T7 = detail::NullType, typename T8 = detail::NullType, + typename T9 = detail::NullType, typename T10 = detail::NullType, + typename T11 = detail::NullType, typename T12 = detail::NullType, + typename T13 = detail::NullType, typename T14 = detail::NullType, + typename T15 = detail::NullType, typename T16 = detail::NullType, + typename T17 = detail::NullType, typename T18 = detail::NullType, + typename T19 = detail::NullType, typename T20 = detail::NullType, + typename T21 = detail::NullType, typename T22 = detail::NullType, + typename T23 = detail::NullType, typename T24 = detail::NullType, + typename T25 = detail::NullType, typename T26 = detail::NullType, + typename T27 = detail::NullType, typename T28 = detail::NullType, + typename T29 = detail::NullType, typename T30 = detail::NullType, + typename T31 = detail::NullType, typename T32 = detail::NullType, + typename T33 = detail::NullType, typename T34 = detail::NullType, + typename T35 = detail::NullType, typename T36 = detail::NullType, + typename T37 = detail::NullType, typename T38 = detail::NullType, + typename T39 = detail::NullType, typename T40 = detail::NullType, + typename T41 = detail::NullType, typename T42 = detail::NullType, + typename T43 = detail::NullType, typename T44 = detail::NullType, + typename T45 = detail::NullType, typename T46 = detail::NullType, + typename T47 = detail::NullType, typename T48 = detail::NullType, + typename T49 = detail::NullType, typename T50 = detail::NullType, + typename T51 = detail::NullType, typename T52 = detail::NullType, + typename T53 = detail::NullType, typename T54 = detail::NullType, + typename T55 = detail::NullType, typename T56 = detail::NullType, + typename T57 = detail::NullType, typename T58 = detail::NullType, + typename T59 = detail::NullType, typename T60 = detail::NullType, + typename T61 = detail::NullType, typename T62 = detail::NullType, + typename T63 = detail::NullType, typename T64 = detail::NullType, + typename T65 = detail::NullType, typename T66 = detail::NullType, + typename T67 = detail::NullType, typename T68 = detail::NullType, + typename T69 = detail::NullType, typename T70 = detail::NullType, + typename T71 = detail::NullType, typename T72 = detail::NullType, + typename T73 = detail::NullType, typename T74 = detail::NullType, + typename T75 = detail::NullType, typename T76 = detail::NullType, + typename T77 = detail::NullType, typename T78 = detail::NullType, + typename T79 = detail::NullType, typename T80 = detail::NullType, + typename T81 = detail::NullType, typename T82 = detail::NullType, + typename T83 = detail::NullType, typename T84 = detail::NullType, + typename T85 = detail::NullType, typename T86 = detail::NullType, + typename T87 = detail::NullType, typename T88 = detail::NullType, + typename T89 = detail::NullType, typename T90 = detail::NullType, + typename T91 = detail::NullType, typename T92 = detail::NullType, + typename T93 = detail::NullType, typename T94 = detail::NullType, + typename T95 = detail::NullType, typename T96 = detail::NullType, + typename T97 = detail::NullType, typename T98 = detail::NullType, + typename T99 = detail::NullType, typename T100 = detail::NullType, + typename T101 = detail::NullType, typename T102 = detail::NullType, + typename T103 = detail::NullType, typename T104 = detail::NullType, + typename T105 = detail::NullType, typename T106 = detail::NullType, + typename T107 = detail::NullType, typename T108 = detail::NullType, + typename T109 = detail::NullType, typename T110 = detail::NullType, + typename T111 = detail::NullType, typename T112 = detail::NullType, + typename T113 = detail::NullType, typename T114 = detail::NullType, + typename T115 = detail::NullType, typename T116 = detail::NullType, + typename T117 = detail::NullType, typename T118 = detail::NullType, + typename T119 = detail::NullType, typename T120 = detail::NullType, + typename T121 = detail::NullType, typename T122 = detail::NullType, + typename T123 = detail::NullType, typename T124 = detail::NullType, + typename T125 = detail::NullType, typename T126 = detail::NullType, + typename T127 = detail::NullType> +struct make_kernel : + public detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31, + T32, T33, T34, T35, + T36, T37, T38, T39, + T40, T41, T42, T43, + T44, T45, T46, T47, + T48, T49, T50, T51, + T52, T53, T54, T55, + T56, T57, T58, T59, + T60, T61, T62, T63, + T64, T65, T66, T67, + T68, T69, T70, T71, + T72, T73, T74, T75, + T76, T77, T78, T79, + T80, T81, T82, T83, + T84, T85, T86, T87, + T88, T89, T90, T91, + T92, T93, T94, T95, + T96, T97, T98, T99, + T100, T101, T102, T103, + T104, T105, T106, T107, + T108, T109, T110, T111, + T112, T113, T114, T115, + T116, T117, T118, T119, + T120, T121, T122, T123, + T124, T125, T126, T127> +{ +public: + typedef detail::KernelFunctorGlobal< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31, + T32, T33, T34, T35, + T36, T37, T38, T39, + T40, T41, T42, T43, + T44, T45, T46, T47, + T48, T49, T50, T51, + T52, T53, T54, T55, + T56, T57, T58, T59, + T60, T61, T62, T63, + T64, T65, T66, T67, + T68, T69, T70, T71, + T72, T73, T74, T75, + T76, T77, T78, T79, + T80, T81, T82, T83, + T84, T85, T86, T87, + T88, T89, T90, T91, + T92, T93, T94, T95, + T96, T97, T98, T99, + T100, T101, T102, T103, + T104, T105, T106, T107, + T108, T109, T110, T111, + T112, T113, T114, T115, + T116, T117, T118, T119, + T120, T121, T122, T123, + T124, T125, T126, T127> FunctorType; + + make_kernel( + const Program& program, + const STRING_CLASS name, + cl_int * err = NULL) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31, + T32, T33, T34, T35, + T36, T37, T38, T39, + T40, T41, T42, T43, + T44, T45, T46, T47, + T48, T49, T50, T51, + T52, T53, T54, T55, + T56, T57, T58, T59, + T60, T61, T62, T63, + T64, T65, T66, T67, + T68, T69, T70, T71, + T72, T73, T74, T75, + T76, T77, T78, T79, + T80, T81, T82, T83, + T84, T85, T86, T87, + T88, T89, T90, T91, + T92, T93, T94, T95, + T96, T97, T98, T99, + T100, T101, T102, T103, + T104, T105, T106, T107, + T108, T109, T110, T111, + T112, T113, T114, T115, + T116, T117, T118, T119, + T120, T121, T122, T123, + T124, T125, T126, T127>( + FunctorType(program, name, err)) + {} + + make_kernel( + const Kernel kernel, + cl_int * err = NULL) : + detail::functionImplementation_< + T0, T1, T2, T3, + T4, T5, T6, T7, + T8, T9, T10, T11, + T12, T13, T14, T15, + T16, T17, T18, T19, + T20, T21, T22, T23, + T24, T25, T26, T27, + T28, T29, T30, T31, + T32, T33, T34, T35, + T36, T37, T38, T39, + T40, T41, T42, T43, + T44, T45, T46, T47, + T48, T49, T50, T51, + T52, T53, T54, T55, + T56, T57, T58, T59, + T60, T61, T62, T63, + T64, T65, T66, T67, + T68, T69, T70, T71, + T72, T73, T74, T75, + T76, T77, T78, T79, + T80, T81, T82, T83, + T84, T85, T86, T87, + T88, T89, T90, T91, + T92, T93, T94, T95, + T96, T97, T98, T99, + T100, T101, T102, T103, + T104, T105, T106, T107, + T108, T109, T110, T111, + T112, T113, T114, T115, + T116, T117, T118, T119, + T120, T121, T122, T123, + T124, T125, T126, T127>( + FunctorType(kernel, err)) + {} +}; + +#endif + +//---------------------------------------------------------------------------------------------------------------------- + +#undef __ERR_STR +#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#undef __GET_DEVICE_INFO_ERR +#undef __GET_PLATFORM_INFO_ERR +#undef __GET_DEVICE_IDS_ERR +#undef __GET_CONTEXT_INFO_ERR +#undef __GET_EVENT_INFO_ERR +#undef __GET_EVENT_PROFILE_INFO_ERR +#undef __GET_MEM_OBJECT_INFO_ERR +#undef __GET_IMAGE_INFO_ERR +#undef __GET_SAMPLER_INFO_ERR +#undef __GET_KERNEL_INFO_ERR +#undef __GET_KERNEL_WORK_GROUP_INFO_ERR +#undef __GET_PROGRAM_INFO_ERR +#undef __GET_PROGRAM_BUILD_INFO_ERR +#undef __GET_COMMAND_QUEUE_INFO_ERR + +#undef __CREATE_CONTEXT_ERR +#undef __CREATE_CONTEXT_FROM_TYPE_ERR +#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR + +#undef __CREATE_BUFFER_ERR +#undef __CREATE_SUBBUFFER_ERR +#undef __CREATE_IMAGE2D_ERR +#undef __CREATE_IMAGE3D_ERR +#undef __CREATE_SAMPLER_ERR +#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR + +#undef __CREATE_USER_EVENT_ERR +#undef __SET_USER_EVENT_STATUS_ERR +#undef __SET_EVENT_CALLBACK_ERR + +#undef __WAIT_FOR_EVENTS_ERR + +#undef __CREATE_KERNEL_ERR +#undef __SET_KERNEL_ARGS_ERR +#undef __CREATE_PROGRAM_WITH_SOURCE_ERR +#undef __CREATE_PROGRAM_WITH_BINARY_ERR +#undef __BUILD_PROGRAM_ERR +#undef __CREATE_KERNELS_IN_PROGRAM_ERR + +#undef __CREATE_COMMAND_QUEUE_ERR +#undef __SET_COMMAND_QUEUE_PROPERTY_ERR +#undef __ENQUEUE_READ_BUFFER_ERR +#undef __ENQUEUE_WRITE_BUFFER_ERR +#undef __ENQUEUE_READ_BUFFER_RECT_ERR +#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR +#undef __ENQEUE_COPY_BUFFER_ERR +#undef __ENQEUE_COPY_BUFFER_RECT_ERR +#undef __ENQUEUE_READ_IMAGE_ERR +#undef __ENQUEUE_WRITE_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_ERR +#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR +#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR +#undef __ENQUEUE_MAP_BUFFER_ERR +#undef __ENQUEUE_MAP_IMAGE_ERR +#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR +#undef __ENQUEUE_NDRANGE_KERNEL_ERR +#undef __ENQUEUE_TASK_ERR +#undef __ENQUEUE_NATIVE_KERNEL + +#undef __UNLOAD_COMPILER_ERR +#endif //__CL_USER_OVERRIDE_ERROR_STRINGS + +#undef __GET_INFO_HELPER_WITH_RETAIN + +// Extensions +#undef __INIT_CL_EXT_FCN_PTR +#undef __CREATE_SUB_DEVICES + +#if defined(USE_CL_DEVICE_FISSION) +#undef __PARAM_NAME_DEVICE_FISSION +#endif // USE_CL_DEVICE_FISSION + +#undef __DEFAULT_NOT_INITIALIZED +#undef __DEFAULT_BEING_INITIALIZED +#undef __DEFAULT_INITIALIZED + +} // namespace cl + +#ifdef _WIN32 +#pragma pop_macro("max") +#endif // _WIN32 + +#endif // CL_HPP_ diff --git a/RTCP/GPUProc/src/CMakeLists.txt b/RTCP/GPUProc/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..312eb25b5c33cca15189f10ff94fb3897e709db8 --- /dev/null +++ b/RTCP/GPUProc/src/CMakeLists.txt @@ -0,0 +1,32 @@ +# $Id: CMakeLists.txt 17003 2011-01-06 08:54:59Z romein $ + +include(LofarPackageVersion) + +# Create symbolic link to include directory. +#execute_process(COMMAND ${CMAKE_COMMAND} -E create_symlink +# ${CMAKE_CURRENT_SOURCE_DIR} +# ${CMAKE_BINARY_DIR}/include/${PACKAGE_NAME}) + +# Add current source directory to -I path. This is needed because GPUProc uses +# angle brackets for internal header files, instead of quotes. +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +set(gpuproc_LIB_SRCS + #Package__Version.cc + BandPass.cc + BeamletBuffer.cc + Delays.cc + FilterBank.cc + InputSection.cc + InputThread.cc + LogThread.cc + OpenCL_Support.cc + ReaderWriterSynchronization.cc + RTCP.cc + UHEP/InvertedStationPPFWeights.cc) + +lofar_add_library(gpuproc ${gpuproc_LIB_SRCS}) + +lofar_add_bin_program(RTCP RTCP.cc) +#lofar_add_bin_program(versiongpuproc versiongpuproc.cc) + diff --git a/RTCP/GPUProc/src/Correlator.cl b/RTCP/GPUProc/src/Correlator.cl new file mode 100644 index 0000000000000000000000000000000000000000..9ca1ce91620cc5927211907be030ff33626164ce --- /dev/null +++ b/RTCP/GPUProc/src/Correlator.cl @@ -0,0 +1,567 @@ +#define NR_BASELINES (NR_STATIONS * (NR_STATIONS + 1) / 2) + +#if NR_STATIONS == 288 +#define BLOCK_SIZE 8 +#elif defined NVIDIA_CUDA && NR_SAMPLES_PER_CHANNEL % 24 == 0 +#define BLOCK_SIZE 24 +#else +#define BLOCK_SIZE 16 +#endif + +typedef __global float4 (*CorrectedDataType)[NR_STATIONS][NR_CHANNELS][NR_SAMPLES_PER_CHANNEL]; +typedef __global float8 (*VisibilitiesType)[NR_BASELINES][NR_CHANNELS]; + + +//#pragma OPENCL EXTENSION cl_intel_printf : enable + +__kernel void correlate(__global void *visibilitiesPtr, + __global const void *correctedDataPtr +) +{ + VisibilitiesType visibilities = (VisibilitiesType) visibilitiesPtr; + CorrectedDataType correctedData = (CorrectedDataType) correctedDataPtr; + + __local float samples[4][BLOCK_SIZE][NR_STATIONS | 1]; // avoid power-of-2 + + uint baseline = get_global_id(0); + uint channel = get_global_id(1); + uint stat_0 = convert_uint_rtz(sqrt(convert_float(8 * baseline + 1)) - 0.99999f) / 2; + uint stat_A = baseline - stat_0 * (stat_0 + 1) / 2; + + float4 visR = (float4) 0, visI = (float4) 0; + + for (uint major = 0; major < NR_SAMPLES_PER_CHANNEL; major += BLOCK_SIZE) { + // load data into local memory + for (uint i = get_local_id(0); i < BLOCK_SIZE * NR_STATIONS; i += get_local_size(0)) { + uint time = i % BLOCK_SIZE; + uint stat = i / BLOCK_SIZE; + + float4 sample = (*correctedData)[stat][channel][major + time]; + + samples[0][time][stat] = sample.x; + samples[1][time][stat] = sample.y; + samples[2][time][stat] = sample.z; + samples[3][time][stat] = sample.w; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // compute correlations + if (baseline < NR_BASELINES) { + for (uint time = 0; time < BLOCK_SIZE; time ++) { + float4 sample_1, sample_A; + sample_1.x = samples[0][time][stat_0]; + sample_1.y = samples[1][time][stat_0]; + sample_1.z = samples[2][time][stat_0]; + sample_1.w = samples[3][time][stat_0]; + sample_A.x = samples[0][time][stat_A]; + sample_A.y = samples[1][time][stat_A]; + sample_A.z = samples[2][time][stat_A]; + sample_A.w = samples[3][time][stat_A]; + + visR += sample_1.xxzz * sample_A.xzxz; + visI += sample_1.yyww * sample_A.xzxz; + visR += sample_1.yyww * sample_A.ywyw; + visI -= sample_1.xxzz * sample_A.ywyw; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + + // write visibilities + if (baseline < NR_BASELINES) + (*visibilities)[baseline][channel] = (float8) { visR.x, visI.x, visR.y, visI.y, visR.z, visI.z, visR.w, visI.w }; +} + + +__kernel void correlate_2x2(__global void *visibilitiesPtr, + __global const void *correctedDataPtr +) +{ + VisibilitiesType visibilities = (VisibilitiesType) visibilitiesPtr; + CorrectedDataType correctedData = (CorrectedDataType) correctedDataPtr; + + __local float4 samples[2][BLOCK_SIZE][(NR_STATIONS + 1) / 2 | 1]; // avoid power-of-2 + + uint channel = get_global_id(1); + uint block = get_global_id(0); + + uint x = convert_uint_rtz(sqrt(convert_float(8 * block + 1)) - 0.99999f) / 2; + uint y = block - x * (x + 1) / 2; + + uint stat_A = 2 * x; + + bool compute_correlations = stat_A < NR_STATIONS; + + float4 vis_0A_r = (float4) 0, vis_0A_i = (float4) 0; + float4 vis_0B_r = (float4) 0, vis_0B_i = (float4) 0; + float4 vis_1A_r = (float4) 0, vis_1A_i = (float4) 0; + float4 vis_1B_r = (float4) 0, vis_1B_i = (float4) 0; + + for (uint major = 0; major < NR_SAMPLES_PER_CHANNEL; major += BLOCK_SIZE) { + // load data into local memory +#pragma unroll 1 + for (uint i = get_local_id(0); i < BLOCK_SIZE * NR_STATIONS; i += get_local_size(0)) { + uint time = i % BLOCK_SIZE; + uint stat = i / BLOCK_SIZE; + + samples[stat & 1][time][stat / 2] = (*correctedData)[stat][channel][major + time]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (compute_correlations) { + for (uint time = 0; time < BLOCK_SIZE; time ++) { + float4 sample_0 = samples[0][time][y]; + float4 sample_A = samples[0][time][x]; + float4 sample_B = samples[1][time][x]; + float4 sample_1 = samples[1][time][y]; + + vis_0A_r += sample_0.xxzz * sample_A.xzxz; + vis_0A_i += sample_0.yyww * sample_A.xzxz; + vis_0B_r += sample_0.xxzz * sample_B.xzxz; + vis_0B_i += sample_0.yyww * sample_B.xzxz; + vis_1A_r += sample_1.xxzz * sample_A.xzxz; + vis_1A_i += sample_1.yyww * sample_A.xzxz; + vis_1B_r += sample_1.xxzz * sample_B.xzxz; + vis_1B_i += sample_1.yyww * sample_B.xzxz; + + vis_0A_r += sample_0.yyww * sample_A.ywyw; + vis_0A_i -= sample_0.xxzz * sample_A.ywyw; + vis_0B_r += sample_0.yyww * sample_B.ywyw; + vis_0B_i -= sample_0.xxzz * sample_B.ywyw; + vis_1A_r += sample_1.yyww * sample_A.ywyw; + vis_1A_i -= sample_1.xxzz * sample_A.ywyw; + vis_1B_r += sample_1.yyww * sample_B.ywyw; + vis_1B_i -= sample_1.xxzz * sample_B.ywyw; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + + // write visibilities + uint stat_0 = 2 * y; + uint stat_1 = stat_0 + 1; + uint stat_B = stat_A + 1; + bool do_baseline_0A = stat_A < NR_STATIONS; + bool do_baseline_0B = stat_B < NR_STATIONS; + bool do_baseline_1A = do_baseline_0A && stat_1 <= stat_A; + bool do_baseline_1B = do_baseline_0B; + + if (do_baseline_0A) { + uint baseline = (stat_A * (stat_A + 1) / 2) + stat_0; + (*visibilities)[baseline][channel] = (float8) { vis_0A_r.x, vis_0A_i.x, vis_0A_r.y, vis_0A_i.y, vis_0A_r.z, vis_0A_i.z, vis_0A_r.w, vis_0A_i.w }; + } + + if (do_baseline_0B) { + uint baseline = (stat_B * (stat_B + 1) / 2) + stat_0; + (*visibilities)[baseline][channel] = (float8) { vis_0B_r.x, vis_0B_i.x, vis_0B_r.y, vis_0B_i.y, vis_0B_r.z, vis_0B_i.z, vis_0B_r.w, vis_0B_i.w }; + } + + if (do_baseline_1A) { + uint baseline = (stat_A * (stat_A + 1) / 2) + stat_1; + (*visibilities)[baseline][channel] = (float8) { vis_1A_r.x, vis_1A_i.x, vis_1A_r.y, vis_1A_i.y, vis_1A_r.z, vis_1A_i.z, vis_1A_r.w, vis_1A_i.w }; + } + + if (do_baseline_1B) { + uint baseline = (stat_B * (stat_B + 1) / 2) + stat_1; + (*visibilities)[baseline][channel] = (float8) { vis_1B_r.x, vis_1B_i.x, vis_1B_r.y, vis_1B_i.y, vis_1B_r.z, vis_1B_i.z, vis_1B_r.w, vis_1B_i.w }; + } +} + + +__kernel void correlate_3x3(__global void *visibilitiesPtr, + __global const void *correctedDataPtr +) +{ + VisibilitiesType visibilities = (VisibilitiesType) visibilitiesPtr; + CorrectedDataType correctedData = (CorrectedDataType) correctedDataPtr; + + __local float4 samples[3][BLOCK_SIZE][(NR_STATIONS + 2) / 3 | 1]; // avoid power-of-2 + + uint channel = get_global_id(1); + uint block = get_global_id(0); + + uint x = convert_uint_rtz(sqrt(convert_float(8 * block + 1)) - 0.99999f) / 2; + uint y = block - x * (x + 1) / 2; + + uint stat_A = 3 * x; + + bool compute_correlations = stat_A < NR_STATIONS; + + float4 vis_0A_r = (float4) 0, vis_0A_i = (float4) 0; + float4 vis_0B_r = (float4) 0, vis_0B_i = (float4) 0; + float4 vis_0C_r = (float4) 0, vis_0C_i = (float4) 0; + float4 vis_1A_r = (float4) 0, vis_1A_i = (float4) 0; + float4 vis_1B_r = (float4) 0, vis_1B_i = (float4) 0; + float4 vis_1C_r = (float4) 0, vis_1C_i = (float4) 0; + float4 vis_2A_r = (float4) 0, vis_2A_i = (float4) 0; + float4 vis_2B_r = (float4) 0, vis_2B_i = (float4) 0; + float4 vis_2C_r = (float4) 0, vis_2C_i = (float4) 0; + + for (uint major = 0; major < NR_SAMPLES_PER_CHANNEL; major += BLOCK_SIZE) { + // load data into local memory +#pragma unroll 1 + for (uint i = get_local_id(0); i < BLOCK_SIZE * NR_STATIONS; i += get_local_size(0)) { + uint time = i % BLOCK_SIZE; + uint stat = i / BLOCK_SIZE; + + samples[stat % 3][time][stat / 3] = (*correctedData)[stat][channel][major + time]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (compute_correlations) { + for (uint time = 0; time < BLOCK_SIZE; time ++) { + float4 sample_0 = samples[0][time][y]; + float4 sample_A = samples[0][time][x]; + float4 sample_B = samples[1][time][x]; + float4 sample_C = samples[2][time][x]; + float4 sample_1 = samples[1][time][y]; + float4 sample_2 = samples[2][time][y]; + + vis_0A_r += sample_0.xxzz * sample_A.xzxz; + vis_0A_i += sample_0.yyww * sample_A.xzxz; + vis_0B_r += sample_0.xxzz * sample_B.xzxz; + vis_0B_i += sample_0.yyww * sample_B.xzxz; + vis_0C_r += sample_0.xxzz * sample_C.xzxz; + vis_0C_i += sample_0.yyww * sample_C.xzxz; + vis_1A_r += sample_1.xxzz * sample_A.xzxz; + vis_1A_i += sample_1.yyww * sample_A.xzxz; + vis_1B_r += sample_1.xxzz * sample_B.xzxz; + vis_1B_i += sample_1.yyww * sample_B.xzxz; + vis_1C_r += sample_1.xxzz * sample_C.xzxz; + vis_1C_i += sample_1.yyww * sample_C.xzxz; + vis_2A_r += sample_2.xxzz * sample_A.xzxz; + vis_2A_i += sample_2.yyww * sample_A.xzxz; + vis_2B_r += sample_2.xxzz * sample_B.xzxz; + vis_2B_i += sample_2.yyww * sample_B.xzxz; + vis_2C_r += sample_2.xxzz * sample_C.xzxz; + vis_2C_i += sample_2.yyww * sample_C.xzxz; + + vis_0A_r += sample_0.yyww * sample_A.ywyw; + vis_0A_i -= sample_0.xxzz * sample_A.ywyw; + vis_0B_r += sample_0.yyww * sample_B.ywyw; + vis_0B_i -= sample_0.xxzz * sample_B.ywyw; + vis_0C_r += sample_0.yyww * sample_C.ywyw; + vis_0C_i -= sample_0.xxzz * sample_C.ywyw; + vis_1A_r += sample_1.yyww * sample_A.ywyw; + vis_1A_i -= sample_1.xxzz * sample_A.ywyw; + vis_1B_r += sample_1.yyww * sample_B.ywyw; + vis_1B_i -= sample_1.xxzz * sample_B.ywyw; + vis_1C_r += sample_1.yyww * sample_C.ywyw; + vis_1C_i -= sample_1.xxzz * sample_C.ywyw; + vis_2A_r += sample_2.yyww * sample_A.ywyw; + vis_2A_i -= sample_2.xxzz * sample_A.ywyw; + vis_2B_r += sample_2.yyww * sample_B.ywyw; + vis_2B_i -= sample_2.xxzz * sample_B.ywyw; + vis_2C_r += sample_2.yyww * sample_C.ywyw; + vis_2C_i -= sample_2.xxzz * sample_C.ywyw; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + + // write visibilities + uint stat_0 = 3 * y; + uint stat_1 = stat_0 + 1; + uint stat_2 = stat_0 + 2; + uint stat_B = stat_A + 1; + uint stat_C = stat_A + 2; + + bool do_baseline_0A = stat_0 < NR_STATIONS && stat_A < NR_STATIONS && stat_0 <= stat_A; + bool do_baseline_0B = stat_0 < NR_STATIONS && stat_B < NR_STATIONS && stat_0 <= stat_B; + bool do_baseline_0C = stat_0 < NR_STATIONS && stat_C < NR_STATIONS && stat_0 <= stat_C; + bool do_baseline_1A = stat_1 < NR_STATIONS && stat_A < NR_STATIONS && stat_1 <= stat_A; + bool do_baseline_1B = stat_1 < NR_STATIONS && stat_B < NR_STATIONS && stat_1 <= stat_B; + bool do_baseline_1C = stat_1 < NR_STATIONS && stat_C < NR_STATIONS && stat_1 <= stat_C; + bool do_baseline_2A = stat_2 < NR_STATIONS && stat_A < NR_STATIONS && stat_2 <= stat_A; + bool do_baseline_2B = stat_2 < NR_STATIONS && stat_B < NR_STATIONS && stat_2 <= stat_B; + bool do_baseline_2C = stat_2 < NR_STATIONS && stat_C < NR_STATIONS && stat_2 <= stat_C; + + if (do_baseline_0A) { + uint baseline = (stat_A * (stat_A + 1) / 2) + stat_0; + (*visibilities)[baseline][channel] = (float8) { vis_0A_r.x, vis_0A_i.x, vis_0A_r.y, vis_0A_i.y, vis_0A_r.z, vis_0A_i.z, vis_0A_r.w, vis_0A_i.w }; + } + + if (do_baseline_0B) { + uint baseline = (stat_B * (stat_B + 1) / 2) + stat_0; + (*visibilities)[baseline][channel] = (float8) { vis_0B_r.x, vis_0B_i.x, vis_0B_r.y, vis_0B_i.y, vis_0B_r.z, vis_0B_i.z, vis_0B_r.w, vis_0B_i.w }; + } + + if (do_baseline_0C) { + uint baseline = (stat_C * (stat_C + 1) / 2) + stat_0; + (*visibilities)[baseline][channel] = (float8) { vis_0C_r.x, vis_0C_i.x, vis_0C_r.y, vis_0C_i.y, vis_0C_r.z, vis_0C_i.z, vis_0C_r.w, vis_0C_i.w }; + } + + if (do_baseline_1A) { + uint baseline = (stat_A * (stat_A + 1) / 2) + stat_1; + (*visibilities)[baseline][channel] = (float8) { vis_1A_r.x, vis_1A_i.x, vis_1A_r.y, vis_1A_i.y, vis_1A_r.z, vis_1A_i.z, vis_1A_r.w, vis_1A_i.w }; + } + + if (do_baseline_1B) { + uint baseline = (stat_B * (stat_B + 1) / 2) + stat_1; + (*visibilities)[baseline][channel] = (float8) { vis_1B_r.x, vis_1B_i.x, vis_1B_r.y, vis_1B_i.y, vis_1B_r.z, vis_1B_i.z, vis_1B_r.w, vis_1B_i.w }; + } + + if (do_baseline_1C) { + uint baseline = (stat_C * (stat_C + 1) / 2) + stat_1; + (*visibilities)[baseline][channel] = (float8) { vis_1C_r.x, vis_1C_i.x, vis_1C_r.y, vis_1C_i.y, vis_1C_r.z, vis_1C_i.z, vis_1C_r.w, vis_1C_i.w }; + } + + if (do_baseline_2A) { + uint baseline = (stat_A * (stat_A + 1) / 2) + stat_2; + (*visibilities)[baseline][channel] = (float8) { vis_2A_r.x, vis_2A_i.x, vis_2A_r.y, vis_2A_i.y, vis_2A_r.z, vis_2A_i.z, vis_2A_r.w, vis_2A_i.w }; + } + + if (do_baseline_2B) { + uint baseline = (stat_B * (stat_B + 1) / 2) + stat_2; + (*visibilities)[baseline][channel] = (float8) { vis_2B_r.x, vis_2B_i.x, vis_2B_r.y, vis_2B_i.y, vis_2B_r.z, vis_2B_i.z, vis_2B_r.w, vis_2B_i.w }; + } + + if (do_baseline_2C) { + uint baseline = (stat_C * (stat_C + 1) / 2) + stat_2; + (*visibilities)[baseline][channel] = (float8) { vis_2C_r.x, vis_2C_i.x, vis_2C_r.y, vis_2C_i.y, vis_2C_r.z, vis_2C_i.z, vis_2C_r.w, vis_2C_i.w }; + } +} + + +__kernel void correlate_4x4(__global void *visibilitiesPtr, + __global const void *correctedDataPtr +) +{ + VisibilitiesType visibilities = (VisibilitiesType) visibilitiesPtr; + CorrectedDataType correctedData = (CorrectedDataType) correctedDataPtr; + + __local float4 samples[4][BLOCK_SIZE][(NR_STATIONS + 3) / 4 | 1]; // avoid power-of-2 + + uint channel = get_global_id(1); + uint block = get_global_id(0); + + uint x = convert_uint_rtz(sqrt(convert_float(8 * block + 1)) - 0.99999f) / 2; + uint y = block - x * (x + 1) / 2; + + uint stat_A = 4 * x; + + bool compute_correlations = stat_A < NR_STATIONS; + + float4 vis_0A_r = (float4) 0, vis_0A_i = (float4) 0; + float4 vis_0B_r = (float4) 0, vis_0B_i = (float4) 0; + float4 vis_0C_r = (float4) 0, vis_0C_i = (float4) 0; + float4 vis_0D_r = (float4) 0, vis_0D_i = (float4) 0; + float4 vis_1A_r = (float4) 0, vis_1A_i = (float4) 0; + float4 vis_1B_r = (float4) 0, vis_1B_i = (float4) 0; + float4 vis_1C_r = (float4) 0, vis_1C_i = (float4) 0; + float4 vis_1D_r = (float4) 0, vis_1D_i = (float4) 0; + float4 vis_2A_r = (float4) 0, vis_2A_i = (float4) 0; + float4 vis_2B_r = (float4) 0, vis_2B_i = (float4) 0; + float4 vis_2C_r = (float4) 0, vis_2C_i = (float4) 0; + float4 vis_2D_r = (float4) 0, vis_2D_i = (float4) 0; + float4 vis_3A_r = (float4) 0, vis_3A_i = (float4) 0; + float4 vis_3B_r = (float4) 0, vis_3B_i = (float4) 0; + float4 vis_3C_r = (float4) 0, vis_3C_i = (float4) 0; + float4 vis_3D_r = (float4) 0, vis_3D_i = (float4) 0; + + for (uint major = 0; major < NR_SAMPLES_PER_CHANNEL; major += BLOCK_SIZE) { + // load data into local memory +#pragma unroll 1 + for (uint i = get_local_id(0); i < BLOCK_SIZE * NR_STATIONS; i += get_local_size(0)) { + uint time = i % BLOCK_SIZE; + uint stat = i / BLOCK_SIZE; + + samples[stat % 4][time][stat / 4] = (*correctedData)[stat][channel][major + time]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (compute_correlations) { + for (uint time = 0; time < BLOCK_SIZE; time ++) { + float4 sample_0 = samples[0][time][y]; + float4 sample_A = samples[0][time][x]; + float4 sample_B = samples[1][time][x]; + float4 sample_C = samples[2][time][x]; + float4 sample_D = samples[3][time][x]; + float4 sample_1 = samples[1][time][y]; + float4 sample_2 = samples[2][time][y]; + float4 sample_3 = samples[3][time][y]; + + vis_0A_r += sample_0.xxzz * sample_A.xzxz; + vis_0A_i += sample_0.yyww * sample_A.xzxz; + vis_0B_r += sample_0.xxzz * sample_B.xzxz; + vis_0B_i += sample_0.yyww * sample_B.xzxz; + vis_0C_r += sample_0.xxzz * sample_C.xzxz; + vis_0C_i += sample_0.yyww * sample_C.xzxz; + vis_0D_r += sample_0.xxzz * sample_D.xzxz; + vis_0D_i += sample_0.yyww * sample_D.xzxz; + vis_1A_r += sample_1.xxzz * sample_A.xzxz; + vis_1A_i += sample_1.yyww * sample_A.xzxz; + vis_1B_r += sample_1.xxzz * sample_B.xzxz; + vis_1B_i += sample_1.yyww * sample_B.xzxz; + vis_1C_r += sample_1.xxzz * sample_C.xzxz; + vis_1C_i += sample_1.yyww * sample_C.xzxz; + vis_1D_r += sample_1.xxzz * sample_D.xzxz; + vis_1D_i += sample_1.yyww * sample_D.xzxz; + vis_2A_r += sample_2.xxzz * sample_A.xzxz; + vis_2A_i += sample_2.yyww * sample_A.xzxz; + vis_2B_r += sample_2.xxzz * sample_B.xzxz; + vis_2B_i += sample_2.yyww * sample_B.xzxz; + vis_2C_r += sample_2.xxzz * sample_C.xzxz; + vis_2C_i += sample_2.yyww * sample_C.xzxz; + vis_2D_r += sample_2.xxzz * sample_D.xzxz; + vis_2D_i += sample_2.yyww * sample_D.xzxz; + vis_3A_r += sample_3.xxzz * sample_A.xzxz; + vis_3A_i += sample_3.yyww * sample_A.xzxz; + vis_3B_r += sample_3.xxzz * sample_B.xzxz; + vis_3B_i += sample_3.yyww * sample_B.xzxz; + vis_3C_r += sample_3.xxzz * sample_C.xzxz; + vis_3C_i += sample_3.yyww * sample_C.xzxz; + vis_3D_r += sample_3.xxzz * sample_D.xzxz; + vis_3D_i += sample_3.yyww * sample_D.xzxz; + + vis_0A_r += sample_0.yyww * sample_A.ywyw; + vis_0A_i -= sample_0.xxzz * sample_A.ywyw; + vis_0B_r += sample_0.yyww * sample_B.ywyw; + vis_0B_i -= sample_0.xxzz * sample_B.ywyw; + vis_0C_r += sample_0.yyww * sample_C.ywyw; + vis_0C_i -= sample_0.xxzz * sample_C.ywyw; + vis_0D_r += sample_0.yyww * sample_D.ywyw; + vis_0D_i -= sample_0.xxzz * sample_D.ywyw; + vis_1A_r += sample_1.yyww * sample_A.ywyw; + vis_1A_i -= sample_1.xxzz * sample_A.ywyw; + vis_1B_r += sample_1.yyww * sample_B.ywyw; + vis_1B_i -= sample_1.xxzz * sample_B.ywyw; + vis_1C_r += sample_1.yyww * sample_C.ywyw; + vis_1C_i -= sample_1.xxzz * sample_C.ywyw; + vis_1D_r += sample_1.yyww * sample_D.ywyw; + vis_1D_i -= sample_1.xxzz * sample_D.ywyw; + vis_2A_r += sample_2.yyww * sample_A.ywyw; + vis_2A_i -= sample_2.xxzz * sample_A.ywyw; + vis_2B_r += sample_2.yyww * sample_B.ywyw; + vis_2B_i -= sample_2.xxzz * sample_B.ywyw; + vis_2C_r += sample_2.yyww * sample_C.ywyw; + vis_2C_i -= sample_2.xxzz * sample_C.ywyw; + vis_2D_r += sample_2.yyww * sample_D.ywyw; + vis_2D_i -= sample_2.xxzz * sample_D.ywyw; + vis_3A_r += sample_3.yyww * sample_A.ywyw; + vis_3A_i -= sample_3.xxzz * sample_A.ywyw; + vis_3B_r += sample_3.yyww * sample_B.ywyw; + vis_3B_i -= sample_3.xxzz * sample_B.ywyw; + vis_3C_r += sample_3.yyww * sample_C.ywyw; + vis_3C_i -= sample_3.xxzz * sample_C.ywyw; + vis_3D_r += sample_3.yyww * sample_D.ywyw; + vis_3D_i -= sample_3.xxzz * sample_D.ywyw; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + + // write visibilities + uint stat_0 = 4 * y; + uint stat_1 = stat_0 + 1; + uint stat_2 = stat_0 + 2; + uint stat_3 = stat_0 + 3; + uint stat_B = stat_A + 1; + uint stat_C = stat_A + 2; + uint stat_D = stat_A + 3; + + bool do_baseline_0A = stat_0 < NR_STATIONS && stat_A < NR_STATIONS && stat_0 <= stat_A; + bool do_baseline_0B = stat_0 < NR_STATIONS && stat_B < NR_STATIONS && stat_0 <= stat_B; + bool do_baseline_0C = stat_0 < NR_STATIONS && stat_C < NR_STATIONS && stat_0 <= stat_C; + bool do_baseline_0D = stat_0 < NR_STATIONS && stat_D < NR_STATIONS && stat_0 <= stat_D; + bool do_baseline_1A = stat_1 < NR_STATIONS && stat_A < NR_STATIONS && stat_1 <= stat_A; + bool do_baseline_1B = stat_1 < NR_STATIONS && stat_B < NR_STATIONS && stat_1 <= stat_B; + bool do_baseline_1C = stat_1 < NR_STATIONS && stat_C < NR_STATIONS && stat_1 <= stat_C; + bool do_baseline_1D = stat_1 < NR_STATIONS && stat_D < NR_STATIONS && stat_1 <= stat_D; + bool do_baseline_2A = stat_2 < NR_STATIONS && stat_A < NR_STATIONS && stat_2 <= stat_A; + bool do_baseline_2B = stat_2 < NR_STATIONS && stat_B < NR_STATIONS && stat_2 <= stat_B; + bool do_baseline_2C = stat_2 < NR_STATIONS && stat_C < NR_STATIONS && stat_2 <= stat_C; + bool do_baseline_2D = stat_2 < NR_STATIONS && stat_D < NR_STATIONS && stat_2 <= stat_D; + bool do_baseline_3A = stat_3 < NR_STATIONS && stat_A < NR_STATIONS && stat_3 <= stat_A; + bool do_baseline_3B = stat_3 < NR_STATIONS && stat_B < NR_STATIONS && stat_3 <= stat_B; + bool do_baseline_3C = stat_3 < NR_STATIONS && stat_C < NR_STATIONS && stat_3 <= stat_C; + bool do_baseline_3D = stat_3 < NR_STATIONS && stat_D < NR_STATIONS && stat_3 <= stat_D; + + if (do_baseline_0A) { + uint baseline = (stat_A * (stat_A + 1) / 2) + stat_0; + (*visibilities)[baseline][channel] = (float8) { vis_0A_r.x, vis_0A_i.x, vis_0A_r.y, vis_0A_i.y, vis_0A_r.z, vis_0A_i.z, vis_0A_r.w, vis_0A_i.w }; + } + + if (do_baseline_0B) { + uint baseline = (stat_B * (stat_B + 1) / 2) + stat_0; + (*visibilities)[baseline][channel] = (float8) { vis_0B_r.x, vis_0B_i.x, vis_0B_r.y, vis_0B_i.y, vis_0B_r.z, vis_0B_i.z, vis_0B_r.w, vis_0B_i.w }; + } + + if (do_baseline_0C) { + uint baseline = (stat_C * (stat_C + 1) / 2) + stat_0; + (*visibilities)[baseline][channel] = (float8) { vis_0C_r.x, vis_0C_i.x, vis_0C_r.y, vis_0C_i.y, vis_0C_r.z, vis_0C_i.z, vis_0C_r.w, vis_0C_i.w }; + } + + if (do_baseline_0D) { + uint baseline = (stat_D * (stat_D + 1) / 2) + stat_0; + (*visibilities)[baseline][channel] = (float8) { vis_0D_r.x, vis_0D_i.x, vis_0D_r.y, vis_0D_i.y, vis_0D_r.z, vis_0D_i.z, vis_0D_r.w, vis_0D_i.w }; + } + + if (do_baseline_1A) { + uint baseline = (stat_A * (stat_A + 1) / 2) + stat_1; + (*visibilities)[baseline][channel] = (float8) { vis_1A_r.x, vis_1A_i.x, vis_1A_r.y, vis_1A_i.y, vis_1A_r.z, vis_1A_i.z, vis_1A_r.w, vis_1A_i.w }; + } + + if (do_baseline_1B) { + uint baseline = (stat_B * (stat_B + 1) / 2) + stat_1; + (*visibilities)[baseline][channel] = (float8) { vis_1B_r.x, vis_1B_i.x, vis_1B_r.y, vis_1B_i.y, vis_1B_r.z, vis_1B_i.z, vis_1B_r.w, vis_1B_i.w }; + } + + if (do_baseline_1C) { + uint baseline = (stat_C * (stat_C + 1) / 2) + stat_1; + (*visibilities)[baseline][channel] = (float8) { vis_1C_r.x, vis_1C_i.x, vis_1C_r.y, vis_1C_i.y, vis_1C_r.z, vis_1C_i.z, vis_1C_r.w, vis_1C_i.w }; + } + + if (do_baseline_1D) { + uint baseline = (stat_D * (stat_D + 1) / 2) + stat_1; + (*visibilities)[baseline][channel] = (float8) { vis_1D_r.x, vis_1D_i.x, vis_1D_r.y, vis_1D_i.y, vis_1D_r.z, vis_1D_i.z, vis_1D_r.w, vis_1D_i.w }; + } + + if (do_baseline_2A) { + uint baseline = (stat_A * (stat_A + 1) / 2) + stat_2; + (*visibilities)[baseline][channel] = (float8) { vis_2A_r.x, vis_2A_i.x, vis_2A_r.y, vis_2A_i.y, vis_2A_r.z, vis_2A_i.z, vis_2A_r.w, vis_2A_i.w }; + } + + if (do_baseline_2B) { + uint baseline = (stat_B * (stat_B + 1) / 2) + stat_2; + (*visibilities)[baseline][channel] = (float8) { vis_2B_r.x, vis_2B_i.x, vis_2B_r.y, vis_2B_i.y, vis_2B_r.z, vis_2B_i.z, vis_2B_r.w, vis_2B_i.w }; + } + + if (do_baseline_2C) { + uint baseline = (stat_C * (stat_C + 1) / 2) + stat_2; + (*visibilities)[baseline][channel] = (float8) { vis_2C_r.x, vis_2C_i.x, vis_2C_r.y, vis_2C_i.y, vis_2C_r.z, vis_2C_i.z, vis_2C_r.w, vis_2C_i.w }; + } + + if (do_baseline_2D) { + uint baseline = (stat_D * (stat_D + 1) / 2) + stat_2; + (*visibilities)[baseline][channel] = (float8) { vis_2D_r.x, vis_2D_i.x, vis_2D_r.y, vis_2D_i.y, vis_2D_r.z, vis_2D_i.z, vis_2D_r.w, vis_2D_i.w }; + } + + if (do_baseline_3A) { + uint baseline = (stat_A * (stat_A + 1) / 2) + stat_3; + (*visibilities)[baseline][channel] = (float8) { vis_3A_r.x, vis_3A_i.x, vis_3A_r.y, vis_3A_i.y, vis_3A_r.z, vis_3A_i.z, vis_3A_r.w, vis_3A_i.w }; + } + + if (do_baseline_3B) { + uint baseline = (stat_B * (stat_B + 1) / 2) + stat_3; + (*visibilities)[baseline][channel] = (float8) { vis_3B_r.x, vis_3B_i.x, vis_3B_r.y, vis_3B_i.y, vis_3B_r.z, vis_3B_i.z, vis_3B_r.w, vis_3B_i.w }; + } + + if (do_baseline_3C) { + uint baseline = (stat_C * (stat_C + 1) / 2) + stat_3; + (*visibilities)[baseline][channel] = (float8) { vis_3C_r.x, vis_3C_i.x, vis_3C_r.y, vis_3C_i.y, vis_3C_r.z, vis_3C_i.z, vis_3C_r.w, vis_3C_i.w }; + } + + if (do_baseline_3D) { + uint baseline = (stat_D * (stat_D + 1) / 2) + stat_3; + (*visibilities)[baseline][channel] = (float8) { vis_3D_r.x, vis_3D_i.x, vis_3D_r.y, vis_3D_i.y, vis_3D_r.z, vis_3D_i.z, vis_3D_r.w, vis_3D_i.w }; + } +} diff --git a/RTCP/GPUProc/src/Correlator.cl-0.ptx b/RTCP/GPUProc/src/Correlator.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..37d40c5127c10f59970bebd14f4800ba1f03e8a1 Binary files /dev/null and b/RTCP/GPUProc/src/Correlator.cl-0.ptx differ diff --git a/RTCP/GPUProc/src/DelayAndBandPass.cl b/RTCP/GPUProc/src/DelayAndBandPass.cl new file mode 100644 index 0000000000000000000000000000000000000000..c0935095ed28aec2e680cd9496c68cd77fab0221 --- /dev/null +++ b/RTCP/GPUProc/src/DelayAndBandPass.cl @@ -0,0 +1,115 @@ +#include "math.cl" + +#if NR_CHANNELS == 1 +#undef BANDPASS_CORRECTION +#endif + + +typedef __global float4 (*OutputDataType)[NR_STATIONS][NR_CHANNELS][NR_SAMPLES_PER_CHANNEL]; +#if NR_CHANNELS == 1 +#if NR_BITS_PER_SAMPLE == 16 +typedef __global short4 (*InputDataType)[NR_STATIONS][NR_SAMPLES_PER_SUBBAND]; +#elif NR_BITS_PER_SAMPLE == 8 +typedef __global char4 (*InputDataType)[NR_STATIONS][NR_SAMPLES_PER_SUBBAND]; +#else +#error unsupport NR_BITS_PER_SAMPLE +#endif +#else +typedef __global float2 (*InputDataType)[NR_STATIONS][NR_POLARIZATIONS][NR_SAMPLES_PER_CHANNEL][NR_CHANNELS]; +#endif +typedef __global const float2 (*DelaysType)[NR_BEAMS][NR_STATIONS]; // 2 Polarizations; in seconds +typedef __global const float2 (*PhaseOffsetsType)[NR_STATIONS]; // 2 Polarizations; in radians +typedef __global const float (*BandPassFactorsType)[NR_CHANNELS]; + + +__kernel void applyDelaysAndCorrectBandPass(__global void *correctedDataPtr, + __global const void *filteredDataPtr, + float subbandFrequency, + unsigned beam, + __global const void *delaysAtBeginPtr, + __global const void *delaysAfterEndPtr, + __global const void *phaseOffsetsPtr, + __global const void *bandPassFactorsPtr) +{ + OutputDataType outputData = (OutputDataType) correctedDataPtr; + InputDataType inputData = (InputDataType) filteredDataPtr; + DelaysType delaysAtBegin = (DelaysType) delaysAtBeginPtr; + DelaysType delaysAfterEnd = (DelaysType) delaysAfterEndPtr; + PhaseOffsetsType phaseOffsets = (PhaseOffsetsType) phaseOffsetsPtr; + +#if NR_CHANNELS > 1 + BandPassFactorsType bandPassFactors = (BandPassFactorsType) bandPassFactorsPtr; + + __local float4 tmp[16][17]; // one too wide to allow coalesced reads + + uint major = get_global_id(0) / 16; + uint minor = get_global_id(0) % 16; + uint channel = get_global_id(1) * 16; +#endif + uint station = get_global_id(2); + +#if defined DELAY_COMPENSATION +#if NR_CHANNELS == 1 + float frequency = subbandFrequency; +#else + float frequency = subbandFrequency - .5f * SUBBAND_BANDWIDTH + (channel + minor) * (SUBBAND_BANDWIDTH / NR_CHANNELS); +#endif + float2 delayAtBegin = (*delaysAtBegin)[beam][station]; + float2 delayAfterEnd = (*delaysAfterEnd)[beam][station]; + float2 phiBegin = -2 * 3.1415926535f * delayAtBegin; + float2 phiEnd = -2 * 3.1415926535f * delayAfterEnd; + float2 deltaPhi = (phiEnd - phiBegin) / NR_SAMPLES_PER_CHANNEL; +#if NR_CHANNELS == 1 + float2 myPhiBegin = (phiBegin + get_local_id(0) * deltaPhi) * frequency + (*phaseOffsets)[station]; + float2 myPhiDelta = get_local_size(0) * deltaPhi * frequency; +#else + float2 myPhiBegin = (phiBegin + major * deltaPhi) * frequency + (*phaseOffsets)[station]; + float2 myPhiDelta = 16 * deltaPhi * frequency; +#endif + float2 vX = (float2) { native_cos(myPhiBegin.x), native_sin(myPhiBegin.x) }; + float2 vY = (float2) { native_cos(myPhiBegin.y), native_sin(myPhiBegin.y) }; + float2 dvX = (float2) { native_cos(myPhiDelta.x), native_sin(myPhiDelta.x) }; + float2 dvY = (float2) { native_cos(myPhiDelta.y), native_sin(myPhiDelta.y) }; +#endif + +#if defined BANDPASS_CORRECTION + float weight = (*bandPassFactors)[channel + minor]; +#endif + +#if defined DELAY_COMPENSATION && defined BANDPASS_CORRECTION + vX *= weight; + vY *= weight; +#endif + +#if NR_CHANNELS == 1 + for (uint time = get_local_id(0); time < NR_SAMPLES_PER_SUBBAND; time += get_local_size(0)) { + float4 samples = convert_float4((*inputData)[station][time]); + float2 sampleX = samples.xy; + float2 sampleY = samples.zw; +#else + for (uint time = 0; time < NR_SAMPLES_PER_CHANNEL; time += 16) { + float2 sampleX = (*inputData)[station][0][time + major][channel + minor]; + float2 sampleY = (*inputData)[station][1][time + major][channel + minor]; +#endif + +#if defined DELAY_COMPENSATION + sampleX = cmul(sampleX, vX); + sampleY = cmul(sampleY, vY); + vX = cmul(vY, dvX); + vY = cmul(vY, dvY); +#elif defined BANDPASS_CORRECTION + sampleX *= weight; + sampleY *= weight; +#endif + +#if NR_CHANNELS == 1 + (*outputData)[station][0][time] = (float4) (sampleX, sampleY); +#else + tmp[major][minor] = (float4) (sampleX, sampleY); + barrier(CLK_LOCAL_MEM_FENCE); + + (*outputData)[station][channel + major][time + minor] = tmp[minor][major]; + barrier(CLK_LOCAL_MEM_FENCE); +#endif + } +} diff --git a/RTCP/GPUProc/src/DelayAndBandPass.cl-0.ptx b/RTCP/GPUProc/src/DelayAndBandPass.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..765b24b939aaab3b67274c4aedf221c92af81818 Binary files /dev/null and b/RTCP/GPUProc/src/DelayAndBandPass.cl-0.ptx differ diff --git a/RTCP/GPUProc/src/Delays.cc b/RTCP/GPUProc/src/Delays.cc new file mode 100644 index 0000000000000000000000000000000000000000..dc6d6bacdd1478a65de6e797859033ecb90532bc --- /dev/null +++ b/RTCP/GPUProc/src/Delays.cc @@ -0,0 +1,290 @@ +//# Delays.cc: Workholder for the delay compensation. +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: Delays.cc 17975 2011-05-10 09:52:51Z mol $ + +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +#include <Delays.h> +#include <Common/LofarLogger.h> +#include <Common/PrettyUnits.h> +#include <Interface/Exceptions.h> +#include <Interface/BeamCoordinates.h> +#include <Common/Thread/Mutex.h> +#include <Common/Thread/Cancellation.h> + +#include <measures/Measures/MEpoch.h> +#include <measures/Measures/MCDirection.h> +#include <casa/Exceptions/Error.h> + +#include <pthread.h> +#include <memory> + + +namespace LOFAR { +namespace RTCP { + +using namespace casa; + +static LOFAR::Mutex casacoreMutex; // casacore is not thread safe + +//##---------------- Public methods ----------------##// + +Delays::Delays(const Parset &parset, const string &stationName, const TimeStamp &startTime) +: + itsParset(parset), + stop(false), + // we need an extra entry for the central beam + itsBuffer(bufferSize, parset.nrBeams(), parset.nrTABs() + 1), + head(0), + tail(0), + bufferFree(bufferSize), + bufferUsed(0), + itsNrCalcDelays(parset.nrCalcDelays()), + itsNrBeams(parset.nrBeams()), + itsNrTABs(parset.nrTABs()), + itsDirectionType(MDirection::J2000), + itsStartTime(startTime), + itsNrSamplesPerSec(parset.nrSamplesPerSubband()), + itsSampleDuration(parset.sampleDuration()), + itsStationName(stationName), + itsDelayTimer("delay producer", true, true), + itsThread(this, &Delays::mainLoop, "[DelayCompensation] ") +{ +} + + +Delays::~Delays() +{ + ScopedDelayCancellation dc; // Semaphores provide cancellation points + + // trigger mainLoop and force it to stop + stop = true; + bufferFree.up(itsNrCalcDelays); +} + + +// convert a time in samples to a (day,fraction) pair in UTC in a CasaCore format +MVEpoch Delays::toUTC(int64 timeInSamples) +{ + double utc_sec = (timeInSamples * itsSampleDuration) / MVEpoch::secInDay; + double day = floor(utc_sec); + double frac = utc_sec - day; + + // (40587 modify Julian day number = 00:00:00 January 1, 1970, GMT) + return MVEpoch(day + 40587., frac); +} + + +void Delays::init() +{ + setBeamDirections(itsParset); + setPositionDiff(itsParset); + + // We need bufferSize to be a multiple of batchSize to avoid wraparounds in + // the middle of the batch calculations. This makes life a lot easier and there is no + // need to support other cases. + + if (bufferSize % itsNrCalcDelays > 0) + THROW(GPUProcException, "nrCalcDelays (" << itsNrCalcDelays << ") must divide bufferSize (" << bufferSize << ")"); + + ScopedLock lock(casacoreMutex); + ScopedDelayCancellation dc; + + // Set an initial epoch for the itsFrame + itsFrame.set(MEpoch(toUTC(itsStartTime), MEpoch::UTC)); + + // Set the position for the itsFrame. + itsFrame.set(itsPhaseCentre); + + // Set-up the conversion engine, using reference direction ITRF. + itsConverter = new MDirection::Convert(itsDirectionType, MDirection::Ref(MDirection::ITRF, itsFrame)); +} + + +void Delays::mainLoop() +{ + LOG_DEBUG("Delay compensation thread running"); + + init(); + + // the current time, in samples + int64 currentTime = itsStartTime; + + try { + while (!stop) { + bufferFree.down(itsNrCalcDelays); + + itsDelayTimer.start(); + + // Calculate itsNrCalcDelays seconds worth of delays. Technically, we do not have + // to calculate that many at the end of the run, but there is no need to + // prevent the few excess delays from being calculated. + + { + ScopedLock lock(casacoreMutex); + ScopedDelayCancellation dc; + + // For each given moment in time ... + for (uint i = 0; i < itsNrCalcDelays; i ++) { + // Set the instant in time in the itsFrame (40587 modify Julian day number = 00:00:00 January 1, 1970, GMT) + itsFrame.resetEpoch(toUTC(currentTime)); + + // Check whether we will store results in a valid place + ASSERTSTR(tail < bufferSize, tail << " < " << bufferSize); + + // For each given direction in the sky ... + for (uint b = 0; b < itsNrBeams; b ++) { + for (uint p = 0; p < itsNrTABs + 1; p ++) { + + // Define the astronomical direction as a J2000 direction. + MVDirection &sky = itsBeamDirections[b][p]; + + // Convert this direction, using the conversion engine. + MDirection dir = (*itsConverter)(sky); + + // Add to the return vector + itsBuffer[tail][b][p] = dir.getValue(); + } + } + + // Advance time for the next calculation + currentTime += itsNrSamplesPerSec; + + // Advance to the next result set. + // since bufferSize % itsNrCalcDelays == 0, wrap + // around can only occur between runs + ++ tail; + } + } + // check for wrap around for the next run + if (tail >= bufferSize) + tail = 0; + + itsDelayTimer.stop(); + + bufferUsed.up(itsNrCalcDelays); + } + } catch (AipsError &ex) { + THROW(GPUProcException, "AipsError: " << ex.what()); + } + + LOG_DEBUG("Delay compensation thread stopped"); +} + + +void Delays::getNextDelays(Matrix<MVDirection> &directions, Matrix<double> &delays) +{ + ASSERTSTR(directions.num_elements() == itsNrBeams * (itsNrTABs + 1), + directions.num_elements() << " == " << itsNrBeams << "*" << (itsNrTABs + 1)); + + ASSERTSTR(delays.num_elements() == itsNrBeams * (itsNrTABs + 1), + delays.num_elements() << " == " << itsNrBeams << "*" << (itsNrTABs + 1)); + + bufferUsed.down(); + + // copy the directions at itsBuffer[head] into the provided buffer, + // and calculate the respective delays + for (unsigned b = 0; b < itsNrBeams; b ++) { + for (unsigned p = 0; p < itsNrTABs + 1; p ++) { + const MVDirection &dir = itsBuffer[head][b][p]; + + directions[b][p] = dir; + delays[b][p] = dir * itsPhasePositionDiff * (1.0 / speedOfLight); + } + } + + // increment the head pointer + if (++ head == bufferSize) + head = 0; + + bufferFree.up(); +} + + +void Delays::setBeamDirections(const Parset &parset) +{ + const BeamCoordinates& pencilBeams = parset.pencilBeams(); + + // TODO: For now, we include pencil beams for all regular beams, + // and use the pencil beam offsets as offsets in J2000. + // To do the coordinates properly, the offsets should be applied + // in today's coordinates (JMEAN/JTRUE?), not J2000. + + itsBeamDirections.resize(itsNrBeams, itsNrTABs + 1); + + // We only support beams of the same direction type for now + const string type0 = toUpper(parset.getBeamDirectionType(0)); + + for (unsigned beam = 1; beam < itsNrBeams; beam ++) { + const string typeN = toUpper(parset.getBeamDirectionType(beam)); + + if (type0 != typeN) + THROW(GPUProcException, "All beams must use the same coordinate system (beam 0 uses " << type0 << " but beam " << beam << " uses " << typeN << ")"); + } + + if (!MDirection::getType(itsDirectionType, type0)) + THROW(GPUProcException, "Beam direction type unknown: " << type0); + + // Get the source directions from the parameter set. + // Split the \a dir vector into separate Direction objects. + for (unsigned beam = 0; beam < itsNrBeams; beam ++) { + const vector<double> beamDir = parset.getBeamDirection(beam); + + // add central beam coordinates for non-beamforming pipelines + itsBeamDirections[beam][0] = MVDirection(beamDir[0], beamDir[1]); + + for (unsigned pencil = 0; pencil < itsNrTABs; pencil ++) { + // obtain pencil coordinate + const BeamCoord3D &pencilCoord = pencilBeams[pencil]; + + // apply angle modification + const double angle1 = beamDir[0] + pencilCoord[0]; + const double angle2 = beamDir[1] + pencilCoord[1]; + + // store beam + itsBeamDirections[beam][pencil + 1] = MVDirection(angle1, angle2); + } + } +} + + +void Delays::setPositionDiff(const Parset &parset) +{ + // Calculate the station to reference station position difference of apply station. + + // Station positions must be given in ITRF + string str = toUpper(parset.positionType()); + + if (str != "ITRF") + THROW(GPUProcException, "OLAP.DelayComp.positionType must be ITRF"); + + // Get the antenna positions from the parameter set. The antenna + // positions are stored as one large vector of doubles. + const MVPosition pRef(parset.getRefPhaseCentre()); + const MVPosition phaseCentre(parset.getPhaseCentreOf(itsStationName)); + + itsPhaseCentre = MPosition(phaseCentre, MPosition::ITRF); + itsPhasePositionDiff = phaseCentre - pRef; +} + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/GPUProc/src/Delays.h b/RTCP/GPUProc/src/Delays.h new file mode 100644 index 0000000000000000000000000000000000000000..9de54cfa5462c0d214034c143e2644b97d7a4262 --- /dev/null +++ b/RTCP/GPUProc/src/Delays.h @@ -0,0 +1,163 @@ +//# Delays.h: Calculate delay compensation for all stations. +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: Delays.h 17975 2011-05-10 09:52:51Z mol $ + +#ifndef LOFAR_GPUPROC_DELAYS_H +#define LOFAR_GPUPROC_DELAYS_H + +// \file +// Calculate delay compensation for all stations. + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + +//# Includes +#include "Common/Timer.h" +#include "Interface/MultiDimArray.h" +#include "Interface/Parset.h" +#include "Interface/RSPTimeStamp.h" +#include "Interface/SmartPtr.h" +#include "Common/Thread/Semaphore.h" +#include "Common/Thread/Thread.h" + +#include <measures/Measures/MeasConvert.h> +#include <measures/Measures/MDirection.h> +#include <measures/Measures/MPosition.h> +#include <casa/Quanta/MVDirection.h> +#include <casa/Quanta/MVPosition.h> +#include <casa/Quanta/MVEpoch.h> + +namespace LOFAR { +namespace RTCP { + + // Speed of light in vacuum, in m/s. +const double speedOfLight = 299792458; + +// Workholder for calculating the delay compensation that must be applied +// per beam per station. We start by calculating the path length +// difference for beam \f$\mathbf{b}_i\f$ between station \f$j\f$ at +// position \f$\mathbf{p}_j\f$ and the reference station 0 at position +// \f$\mathbf{p}_0\f$. +// \f[ +// d_{ij} - d_{i0} = \mathbf{b}_i \cdot \mathbf{p}_j +// - \mathbf{b}_i \cdot \mathbf{p}_0 +// = \mathbf{b}_i \cdot (\mathbf{p}_j - \mathbf{p}_0) +// \f] +// The choice of reference station is arbitrary, so we simply choose the +// first station from the parameter set. From the equation above it is +// clear that we can reduce the number of dot products if we precalculate +// the position difference vectors \f$\mathbf{p}_j - \mathbf{p}_0$\f, +// which we will store in \c itsPositionDiffs. +// +// The geometrical delay is easily obtained by dividing the path length +// difference by the speed of light in vacuum. We don't need to know the +// speed of light in the atmosphere, because the AZEL directions that +// we've calculated are valid for vacuum (only!). This is the delay that +// must be compensated for. +// +// The calculated delay compensation must be split into a coarse (whole +// sample) delay and a fine (subsample) delay. The coarse delay will be +// applied in the input section as a true time delay, by shifting the +// input samples. The fine delay will be applied in the correlator as a +// phase shift in each frequency channel. +class Delays +{ + public: + Delays(const Parset &ps, const string &stationName, const TimeStamp &startTime); + ~Delays(); + + // get the set of directions (ITRF) and delays for the beams, for the next CN integration time + // Both matrices must have dimensions [itsNrBeams][itsNrTABs+1] + void getNextDelays(Matrix<casa::MVDirection> &directions, Matrix<double> &delays); + + private: + casa::MVEpoch toUTC(int64 timeInSamples); + + void init(); + + // do the delay compensation calculations in a separate thread to allow bulk + // calculations and to avoid blocking other threads + void mainLoop(); + + const Parset &itsParset; + + volatile bool stop; + + // the circular buffer to hold the moving beam directions for every second of data + Cube<casa::MVDirection> itsBuffer; + size_t head, tail; + + // two semaphores are used: one to trigger the producer that free space is available, + // another to trigger the consumer that data is available. + Semaphore bufferFree, bufferUsed; + + // the number of seconds to maintain in the buffer + static const size_t bufferSize = 128; + + // the number of delays to calculate in a single run + const unsigned itsNrCalcDelays; + + // Get the source directions from the parameter file and initialize \c + // itsBeamDirections. Beam positions must be specified as + // <tt>(longitude, latitude, direction-type)</tt>. The direction angles + // are in radians; the direction type must be one of J2000, ITRF, or + // AZEL. + void setBeamDirections(const Parset &); + + // Set the station to reference station position differences for + // all stations. CS002LBA is the reference station, even if it + // does not take part in the observation. The position + // differences are stored in \c itsPositionDiffs. In other + // words: we store \f$\mathbf{p}_j - \mathbf{p}_0\f$, where + // \f$\mathbf{p}_0\f$ is the position of the reference station + // and \f$\mathbf{p}_j\f$ is the position of station \f$j\f$. + void setPositionDiff(const Parset &); + + // Beam info. + const unsigned itsNrBeams; + const unsigned itsNrTABs; + casa::MDirection::Types itsDirectionType; + Matrix<casa::MVDirection> itsBeamDirections; // [itsNrBeams][itsNrTABs+1] + + // Sample timings. + const TimeStamp itsStartTime; + const unsigned itsNrSamplesPerSec; + const double itsSampleDuration; + + // Station Name. + const string itsStationName; + casa::MeasFrame itsFrame; + SmartPtr<casa::MDirection::Convert> itsConverter; + + // Station phase centre. + casa::MPosition itsPhaseCentre; + + // Station to reference station position difference vector. + casa::MVPosition itsPhasePositionDiff; + + NSTimer itsDelayTimer; + + Thread itsThread; +}; + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/FFT.cl b/RTCP/GPUProc/src/FFT.cl new file mode 100644 index 0000000000000000000000000000000000000000..d235a8c89095853d855579e1753869bd56bd2244 --- /dev/null +++ b/RTCP/GPUProc/src/FFT.cl @@ -0,0 +1,282 @@ +// Run kernel fft0 with global dim = {64*BatchSize}, local dim={64} +#ifndef M_PI +//#define M_PI 0x1.921fb54442d18p+1 +#define M_PI 3.1415926536f +#endif +#define complexMul(a,b) ((float2)(mad(-(a).y, (b).y, (a).x * (b).x), mad((a).y, (b).x, (a).x * (b).y))) +#define conj(a) ((float2)((a).x, -(a).y)) +#define conjTransp(a) ((float2)(-(a).y, (a).x)) + +#define fftKernel2(a,dir) \ +{ \ + float2 c = (a)[0]; \ + (a)[0] = c + (a)[1]; \ + (a)[1] = c - (a)[1]; \ +} + +#define fftKernel2S(d1,d2,dir) \ +{ \ + float2 c = (d1); \ + (d1) = c + (d2); \ + (d2) = c - (d2); \ +} + +#define fftKernel4(a,dir) \ +{ \ + fftKernel2S((a)[0], (a)[2], dir); \ + fftKernel2S((a)[1], (a)[3], dir); \ + fftKernel2S((a)[0], (a)[1], dir); \ + (a)[3] = (float2)(dir)*(conjTransp((a)[3])); \ + fftKernel2S((a)[2], (a)[3], dir); \ + float2 c = (a)[1]; \ + (a)[1] = (a)[2]; \ + (a)[2] = c; \ +} + +#define fftKernel4s(a0,a1,a2,a3,dir) \ +{ \ + fftKernel2S((a0), (a2), dir); \ + fftKernel2S((a1), (a3), dir); \ + fftKernel2S((a0), (a1), dir); \ + (a3) = (float2)(dir)*(conjTransp((a3))); \ + fftKernel2S((a2), (a3), dir); \ + float2 c = (a1); \ + (a1) = (a2); \ + (a2) = c; \ +} + +#define bitreverse8(a) \ +{ \ + float2 c; \ + c = (a)[1]; \ + (a)[1] = (a)[4]; \ + (a)[4] = c; \ + c = (a)[3]; \ + (a)[3] = (a)[6]; \ + (a)[6] = c; \ +} + +#define fftKernel8(a,dir) \ +{ \ + const float2 w1 = (float2)(0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f); \ + const float2 w3 = (float2)(-0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f); \ + float2 c; \ + fftKernel2S((a)[0], (a)[4], dir); \ + fftKernel2S((a)[1], (a)[5], dir); \ + fftKernel2S((a)[2], (a)[6], dir); \ + fftKernel2S((a)[3], (a)[7], dir); \ + (a)[5] = complexMul(w1, (a)[5]); \ + (a)[6] = (float2)(dir)*(conjTransp((a)[6])); \ + (a)[7] = complexMul(w3, (a)[7]); \ + fftKernel2S((a)[0], (a)[2], dir); \ + fftKernel2S((a)[1], (a)[3], dir); \ + fftKernel2S((a)[4], (a)[6], dir); \ + fftKernel2S((a)[5], (a)[7], dir); \ + (a)[3] = (float2)(dir)*(conjTransp((a)[3])); \ + (a)[7] = (float2)(dir)*(conjTransp((a)[7])); \ + fftKernel2S((a)[0], (a)[1], dir); \ + fftKernel2S((a)[2], (a)[3], dir); \ + fftKernel2S((a)[4], (a)[5], dir); \ + fftKernel2S((a)[6], (a)[7], dir); \ + bitreverse8((a)); \ +} + +#define bitreverse4x4(a) \ +{ \ + float2 c; \ + c = (a)[1]; (a)[1] = (a)[4]; (a)[4] = c; \ + c = (a)[2]; (a)[2] = (a)[8]; (a)[8] = c; \ + c = (a)[3]; (a)[3] = (a)[12]; (a)[12] = c; \ + c = (a)[6]; (a)[6] = (a)[9]; (a)[9] = c; \ + c = (a)[7]; (a)[7] = (a)[13]; (a)[13] = c; \ + c = (a)[11]; (a)[11] = (a)[14]; (a)[14] = c; \ +} + +#define fftKernel16(a,dir) \ +{ \ + const float w0 = 0x1.d906bcp-1f; \ + const float w1 = 0x1.87de2ap-2f; \ + const float w2 = 0x1.6a09e6p-1f; \ + fftKernel4s((a)[0], (a)[4], (a)[8], (a)[12], dir); \ + fftKernel4s((a)[1], (a)[5], (a)[9], (a)[13], dir); \ + fftKernel4s((a)[2], (a)[6], (a)[10], (a)[14], dir); \ + fftKernel4s((a)[3], (a)[7], (a)[11], (a)[15], dir); \ + (a)[5] = complexMul((a)[5], (float2)(w0, dir*w1)); \ + (a)[6] = complexMul((a)[6], (float2)(w2, dir*w2)); \ + (a)[7] = complexMul((a)[7], (float2)(w1, dir*w0)); \ + (a)[9] = complexMul((a)[9], (float2)(w2, dir*w2)); \ + (a)[10] = (float2)(dir)*(conjTransp((a)[10])); \ + (a)[11] = complexMul((a)[11], (float2)(-w2, dir*w2)); \ + (a)[13] = complexMul((a)[13], (float2)(w1, dir*w0)); \ + (a)[14] = complexMul((a)[14], (float2)(-w2, dir*w2)); \ + (a)[15] = complexMul((a)[15], (float2)(-w0, dir*-w1)); \ + fftKernel4((a), dir); \ + fftKernel4((a) + 4, dir); \ + fftKernel4((a) + 8, dir); \ + fftKernel4((a) + 12, dir); \ + bitreverse4x4((a)); \ +} + +#define bitreverse32(a) \ +{ \ + float2 c1, c2; \ + c1 = (a)[2]; (a)[2] = (a)[1]; c2 = (a)[4]; (a)[4] = c1; c1 = (a)[8]; (a)[8] = c2; c2 = (a)[16]; (a)[16] = c1; (a)[1] = c2; \ + c1 = (a)[6]; (a)[6] = (a)[3]; c2 = (a)[12]; (a)[12] = c1; c1 = (a)[24]; (a)[24] = c2; c2 = (a)[17]; (a)[17] = c1; (a)[3] = c2; \ + c1 = (a)[10]; (a)[10] = (a)[5]; c2 = (a)[20]; (a)[20] = c1; c1 = (a)[9]; (a)[9] = c2; c2 = (a)[18]; (a)[18] = c1; (a)[5] = c2; \ + c1 = (a)[14]; (a)[14] = (a)[7]; c2 = (a)[28]; (a)[28] = c1; c1 = (a)[25]; (a)[25] = c2; c2 = (a)[19]; (a)[19] = c1; (a)[7] = c2; \ + c1 = (a)[22]; (a)[22] = (a)[11]; c2 = (a)[13]; (a)[13] = c1; c1 = (a)[26]; (a)[26] = c2; c2 = (a)[21]; (a)[21] = c1; (a)[11] = c2; \ + c1 = (a)[30]; (a)[30] = (a)[15]; c2 = (a)[29]; (a)[29] = c1; c1 = (a)[27]; (a)[27] = c2; c2 = (a)[23]; (a)[23] = c1; (a)[15] = c2; \ +} + +#define fftKernel32(a,dir) \ +{ \ + fftKernel2S((a)[0], (a)[16], dir); \ + fftKernel2S((a)[1], (a)[17], dir); \ + fftKernel2S((a)[2], (a)[18], dir); \ + fftKernel2S((a)[3], (a)[19], dir); \ + fftKernel2S((a)[4], (a)[20], dir); \ + fftKernel2S((a)[5], (a)[21], dir); \ + fftKernel2S((a)[6], (a)[22], dir); \ + fftKernel2S((a)[7], (a)[23], dir); \ + fftKernel2S((a)[8], (a)[24], dir); \ + fftKernel2S((a)[9], (a)[25], dir); \ + fftKernel2S((a)[10], (a)[26], dir); \ + fftKernel2S((a)[11], (a)[27], dir); \ + fftKernel2S((a)[12], (a)[28], dir); \ + fftKernel2S((a)[13], (a)[29], dir); \ + fftKernel2S((a)[14], (a)[30], dir); \ + fftKernel2S((a)[15], (a)[31], dir); \ + (a)[17] = complexMul((a)[17], (float2)(0x1.f6297cp-1f, dir*0x1.8f8b84p-3f)); \ + (a)[18] = complexMul((a)[18], (float2)(0x1.d906bcp-1f, dir*0x1.87de2ap-2f)); \ + (a)[19] = complexMul((a)[19], (float2)(0x1.a9b662p-1f, dir*0x1.1c73b4p-1f)); \ + (a)[20] = complexMul((a)[20], (float2)(0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f)); \ + (a)[21] = complexMul((a)[21], (float2)(0x1.1c73b4p-1f, dir*0x1.a9b662p-1f)); \ + (a)[22] = complexMul((a)[22], (float2)(0x1.87de2ap-2f, dir*0x1.d906bcp-1f)); \ + (a)[23] = complexMul((a)[23], (float2)(0x1.8f8b84p-3f, dir*0x1.f6297cp-1f)); \ + (a)[24] = complexMul((a)[24], (float2)(0x0p+0f, dir*0x1p+0f)); \ + (a)[25] = complexMul((a)[25], (float2)(-0x1.8f8b84p-3f, dir*0x1.f6297cp-1f)); \ + (a)[26] = complexMul((a)[26], (float2)(-0x1.87de2ap-2f, dir*0x1.d906bcp-1f)); \ + (a)[27] = complexMul((a)[27], (float2)(-0x1.1c73b4p-1f, dir*0x1.a9b662p-1f)); \ + (a)[28] = complexMul((a)[28], (float2)(-0x1.6a09e6p-1f, dir*0x1.6a09e6p-1f)); \ + (a)[29] = complexMul((a)[29], (float2)(-0x1.a9b662p-1f, dir*0x1.1c73b4p-1f)); \ + (a)[30] = complexMul((a)[30], (float2)(-0x1.d906bcp-1f, dir*0x1.87de2ap-2f)); \ + (a)[31] = complexMul((a)[31], (float2)(-0x1.f6297cp-1f, dir*0x1.8f8b84p-3f)); \ + fftKernel16((a), dir); \ + fftKernel16((a) + 16, dir); \ + bitreverse32((a)); \ +} + +__kernel void \ +clFFT_1DTwistInterleaved(__global float2 *in, unsigned int startRow, unsigned int numCols, unsigned int N, unsigned int numRowsToProcess, int dir) \ +{ \ + float2 a, w; \ + float ang; \ + unsigned int j; \ + unsigned int i = get_global_id(0); \ + unsigned int startIndex = i; \ + \ + if(i < numCols) \ + { \ + for(j = 0; j < numRowsToProcess; j++) \ + { \ + a = in[startIndex]; \ + ang = 2.0f * M_PI * dir * i * (startRow + j) / N; \ + w = (float2)(native_cos(ang), native_sin(ang)); \ + a = complexMul(a, w); \ + in[startIndex] = a; \ + startIndex += numCols; \ + } \ + } \ +} \ +__kernel void fft0(__global float2 *in_out) +{ + const int dir = -1; + __local float2 sMem[4][272]; + int i, j; + float ang, angf; + __local float2 *lMemStore, *lMemLoad; + float2 a0, a1, a2, a3; + int offset = (get_group_id(0) * 4 + get_local_id(1)) * 256 + get_local_id(0); + in_out += offset; + a0 = in_out[0]; + a1 = in_out[64]; + a2 = in_out[128]; + a3 = in_out[192]; + fftKernel4s(a0, a1, a2, a3, dir); + angf = (float) get_local_id(0); + ang = dir * ( 2.0f * M_PI * 1.0f / 256.0f ) * angf; + float2 w0 = (float2)(native_cos(ang), native_sin(ang)); + ang = dir * ( 2.0f * M_PI * 2.0f / 256.0f ) * angf; + float2 w1 = (float2)(native_cos(ang), native_sin(ang)); + ang = dir * ( 2.0f * M_PI * 3.0f / 256.0f ) * angf; + float2 w2 = (float2)(native_cos(ang), native_sin(ang)); + a1 = complexMul(a1, w0); + a2 = complexMul(a2, w1); + a3 = complexMul(a3, w2); + lMemStore = &sMem[get_local_id(1)][get_local_id(0)]; + j = get_local_id(0) & 3; + i = get_local_id(0) >> 2; + lMemLoad = &sMem[get_local_id(1)][j * 68 + i]; + lMemStore[0] = a0; + lMemStore[68] = a1; + lMemStore[136] = a2; + lMemStore[204] = a3; + barrier(CLK_LOCAL_MEM_FENCE); + a0 = lMemLoad[0]; + a1 = lMemLoad[16]; + a2 = lMemLoad[32]; + a3 = lMemLoad[48]; + barrier(CLK_LOCAL_MEM_FENCE); + fftKernel4s(a0, a1, a2, a3, dir); + angf = (float) (get_local_id(0) >> 2); + ang = dir * ( 2.0f * M_PI * 1.0f / 64.0f ) * angf; + float2 w3 = (float2)(native_cos(ang), native_sin(ang)); + ang = dir * ( 2.0f * M_PI * 2.0f / 64.0f ) * angf; + float2 w4 = (float2)(native_cos(ang), native_sin(ang)); + ang = dir * ( 2.0f * M_PI * 3.0f / 64.0f ) * angf; + float2 w5 = (float2)(native_cos(ang), native_sin(ang)); + a1 = complexMul(a1, w3); + a2 = complexMul(a2, w4); + a3 = complexMul(a3, w5); + j = (get_local_id(0) & 15) >> 2; + i = (get_local_id(0) >> 4) * 4 + (get_local_id(0) & 3); + lMemLoad = &sMem[get_local_id(1)][j * 68 + i]; + lMemStore[0] = a0; + lMemStore[68] = a1; + lMemStore[136] = a2; + lMemStore[204] = a3; + barrier(CLK_LOCAL_MEM_FENCE); + a0 = lMemLoad[0]; + a1 = lMemLoad[16]; + a2 = lMemLoad[32]; + a3 = lMemLoad[48]; + barrier(CLK_LOCAL_MEM_FENCE); + fftKernel4s(a0, a1, a2, a3, dir); + angf = (float) (get_local_id(0) >> 4); + ang = dir * ( 2.0f * M_PI * 1.0f / 16.0f ) * angf; + float2 w6 = (float2)(native_cos(ang), native_sin(ang)); + ang = dir * ( 2.0f * M_PI * 2.0f / 16.0f ) * angf; + float2 w7 = (float2)(native_cos(ang), native_sin(ang)); + ang = dir * ( 2.0f * M_PI * 3.0f / 16.0f ) * angf; + float2 w8 = (float2)(native_cos(ang), native_sin(ang)); + a1 = complexMul(a1, w6); + a2 = complexMul(a2, w7); + a3 = complexMul(a3, w8); + j = get_local_id(0) >> 4; + i = get_local_id(0) & 15; + lMemLoad = &sMem[get_local_id(1)][j * 64 + i]; + lMemStore[0] = a0; + lMemStore[64] = a1; + lMemStore[128] = a2; + lMemStore[192] = a3; + barrier(CLK_LOCAL_MEM_FENCE); + a0 = lMemLoad[0]; + a1 = lMemLoad[16]; + a2 = lMemLoad[32]; + a3 = lMemLoad[48]; + fftKernel4s(a0, a1, a2, a3, dir); + in_out[0] = a0; + in_out[64] = a1; + in_out[128] = a2; + in_out[192] = a3; +} diff --git a/RTCP/GPUProc/src/FIR.cl b/RTCP/GPUProc/src/FIR.cl new file mode 100644 index 0000000000000000000000000000000000000000..c915cd00b315df672e7c461b38b4acbdeeb6ad10 --- /dev/null +++ b/RTCP/GPUProc/src/FIR.cl @@ -0,0 +1,365 @@ +#define COMPLEX 2 // do not change + +#if NR_BITS_PER_SAMPLE == 16 +typedef short SampleType; +#elif NR_BITS_PER_SAMPLE == 8 +typedef char SampleType; +#else +#error unsupport NR_BITS_PER_SAMPLE +#endif + +typedef __global SampleType (*SampledDataType)[NR_STATIONS][NR_TAPS - 1 + NR_SAMPLES_PER_CHANNEL][NR_CHANNELS][NR_POLARIZATIONS * COMPLEX]; +typedef __global float (*FilteredDataType)[NR_STATIONS][NR_POLARIZATIONS][NR_SAMPLES_PER_CHANNEL][NR_CHANNELS][COMPLEX]; +typedef __global const float16 (*WeightsType)[NR_CHANNELS]; + + +__kernel void FIR_filter(__global void *filteredDataPtr, + __global const void *sampledDataPtr, + __global const void *weightsPtr) +{ + SampledDataType sampledData = (SampledDataType) sampledDataPtr; + FilteredDataType filteredData = (FilteredDataType) filteredDataPtr; + WeightsType weightsData = (WeightsType) weightsPtr; + + uint cpr = get_global_id(0); +#if 0 + uint pol_ri = cpr & 3; + uint channel = cpr >> 2; + uint ri = cpr & 1; + uint pol = pol_ri >> 1; +#else + uint ri = cpr & 1; + uint channel = (cpr >> 1) % NR_CHANNELS; + uint pol = (cpr >> 1) / NR_CHANNELS; + uint pol_ri = (pol << 1) | ri; +#endif + uint station = get_global_id(1); + +//#pragma OPENCL EXTENSION cl_amd_printf : enable + + const float16 weights = (*weightsData)[channel]; + float16 delayLine; + float16 sum; + + delayLine.s0 = convert_float((*sampledData)[station][0][channel][pol_ri]); + delayLine.s1 = convert_float((*sampledData)[station][1][channel][pol_ri]); + delayLine.s2 = convert_float((*sampledData)[station][2][channel][pol_ri]); + delayLine.s3 = convert_float((*sampledData)[station][3][channel][pol_ri]); + delayLine.s4 = convert_float((*sampledData)[station][4][channel][pol_ri]); + delayLine.s5 = convert_float((*sampledData)[station][5][channel][pol_ri]); + delayLine.s6 = convert_float((*sampledData)[station][6][channel][pol_ri]); + delayLine.s7 = convert_float((*sampledData)[station][7][channel][pol_ri]); + delayLine.s8 = convert_float((*sampledData)[station][8][channel][pol_ri]); + delayLine.s9 = convert_float((*sampledData)[station][9][channel][pol_ri]); + delayLine.sA = convert_float((*sampledData)[station][10][channel][pol_ri]); + delayLine.sB = convert_float((*sampledData)[station][11][channel][pol_ri]); + delayLine.sC = convert_float((*sampledData)[station][12][channel][pol_ri]); + delayLine.sD = convert_float((*sampledData)[station][13][channel][pol_ri]); + delayLine.sE = convert_float((*sampledData)[station][14][channel][pol_ri]); + + for (uint time = 0; time < NR_SAMPLES_PER_CHANNEL; time += NR_TAPS) { + delayLine.sF = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 0][channel][pol_ri]); + sum.s0 = weights.sF * delayLine.s0; + delayLine.s0 = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 1][channel][pol_ri]); + sum.s0 += weights.sE * delayLine.s1; + sum.s0 += weights.sD * delayLine.s2; + sum.s0 += weights.sC * delayLine.s3; + sum.s0 += weights.sB * delayLine.s4; + sum.s0 += weights.sA * delayLine.s5; + sum.s0 += weights.s9 * delayLine.s6; + sum.s0 += weights.s8 * delayLine.s7; + sum.s0 += weights.s7 * delayLine.s8; + sum.s0 += weights.s6 * delayLine.s9; + sum.s0 += weights.s5 * delayLine.sA; + sum.s0 += weights.s4 * delayLine.sB; + sum.s0 += weights.s3 * delayLine.sC; + sum.s0 += weights.s2 * delayLine.sD; + sum.s0 += weights.s1 * delayLine.sE; + sum.s0 += weights.s0 * delayLine.sF; + (*filteredData)[station][pol][time + 0][channel][ri] = sum.s0; + + sum.s1 = weights.sF * delayLine.s1; + delayLine.s1 = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 2][channel][pol_ri]); + sum.s1 += weights.sE * delayLine.s2; + sum.s1 += weights.sD * delayLine.s3; + sum.s1 += weights.sC * delayLine.s4; + sum.s1 += weights.sB * delayLine.s5; + sum.s1 += weights.sA * delayLine.s6; + sum.s1 += weights.s9 * delayLine.s7; + sum.s1 += weights.s8 * delayLine.s8; + sum.s1 += weights.s7 * delayLine.s9; + sum.s1 += weights.s6 * delayLine.sA; + sum.s1 += weights.s5 * delayLine.sB; + sum.s1 += weights.s4 * delayLine.sC; + sum.s1 += weights.s3 * delayLine.sD; + sum.s1 += weights.s2 * delayLine.sE; + sum.s1 += weights.s1 * delayLine.sF; + sum.s1 += weights.s0 * delayLine.s0; + (*filteredData)[station][pol][time + 1][channel][ri] = sum.s1; + + sum.s2 = weights.sF * delayLine.s2; + delayLine.s2 = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 3][channel][pol_ri]); + sum.s2 += weights.sE * delayLine.s3; + sum.s2 += weights.sD * delayLine.s4; + sum.s2 += weights.sC * delayLine.s5; + sum.s2 += weights.sB * delayLine.s6; + sum.s2 += weights.sA * delayLine.s7; + sum.s2 += weights.s9 * delayLine.s8; + sum.s2 += weights.s8 * delayLine.s9; + sum.s2 += weights.s7 * delayLine.sA; + sum.s2 += weights.s6 * delayLine.sB; + sum.s2 += weights.s5 * delayLine.sC; + sum.s2 += weights.s4 * delayLine.sD; + sum.s2 += weights.s3 * delayLine.sE; + sum.s2 += weights.s2 * delayLine.sF; + sum.s2 += weights.s1 * delayLine.s0; + sum.s2 += weights.s0 * delayLine.s1; + (*filteredData)[station][pol][time + 2][channel][ri] = sum.s2; + + sum.s3 = weights.sF * delayLine.s3; + delayLine.s3 = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 4][channel][pol_ri]); + sum.s3 += weights.sE * delayLine.s4; + sum.s3 += weights.sD * delayLine.s5; + sum.s3 += weights.sC * delayLine.s6; + sum.s3 += weights.sB * delayLine.s7; + sum.s3 += weights.sA * delayLine.s8; + sum.s3 += weights.s9 * delayLine.s9; + sum.s3 += weights.s8 * delayLine.sA; + sum.s3 += weights.s7 * delayLine.sB; + sum.s3 += weights.s6 * delayLine.sC; + sum.s3 += weights.s5 * delayLine.sD; + sum.s3 += weights.s4 * delayLine.sE; + sum.s3 += weights.s3 * delayLine.sF; + sum.s3 += weights.s2 * delayLine.s0; + sum.s3 += weights.s1 * delayLine.s1; + sum.s3 += weights.s0 * delayLine.s2; + (*filteredData)[station][pol][time + 3][channel][ri] = sum.s3; + + sum.s4 = weights.sF * delayLine.s4; + delayLine.s4 = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 5][channel][pol_ri]); + sum.s4 += weights.sE * delayLine.s5; + sum.s4 += weights.sD * delayLine.s6; + sum.s4 += weights.sC * delayLine.s7; + sum.s4 += weights.sB * delayLine.s8; + sum.s4 += weights.sA * delayLine.s9; + sum.s4 += weights.s9 * delayLine.sA; + sum.s4 += weights.s8 * delayLine.sB; + sum.s4 += weights.s7 * delayLine.sC; + sum.s4 += weights.s6 * delayLine.sD; + sum.s4 += weights.s5 * delayLine.sE; + sum.s4 += weights.s4 * delayLine.sF; + sum.s4 += weights.s3 * delayLine.s0; + sum.s4 += weights.s2 * delayLine.s1; + sum.s4 += weights.s1 * delayLine.s2; + sum.s4 += weights.s0 * delayLine.s3; + (*filteredData)[station][pol][time + 4][channel][ri] = sum.s4; + + sum.s5 = weights.sF * delayLine.s5; + delayLine.s5 = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 6][channel][pol_ri]); + sum.s5 += weights.sE * delayLine.s6; + sum.s5 += weights.sD * delayLine.s7; + sum.s5 += weights.sC * delayLine.s8; + sum.s5 += weights.sB * delayLine.s9; + sum.s5 += weights.sA * delayLine.sA; + sum.s5 += weights.s9 * delayLine.sB; + sum.s5 += weights.s8 * delayLine.sC; + sum.s5 += weights.s7 * delayLine.sD; + sum.s5 += weights.s6 * delayLine.sE; + sum.s5 += weights.s5 * delayLine.sF; + sum.s5 += weights.s4 * delayLine.s0; + sum.s5 += weights.s3 * delayLine.s1; + sum.s5 += weights.s2 * delayLine.s2; + sum.s5 += weights.s1 * delayLine.s3; + sum.s5 += weights.s0 * delayLine.s4; + (*filteredData)[station][pol][time + 5][channel][ri] = sum.s5; + + sum.s6 = weights.sF * delayLine.s6; + delayLine.s6 = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 7][channel][pol_ri]); + sum.s6 += weights.sE * delayLine.s7; + sum.s6 += weights.sD * delayLine.s8; + sum.s6 += weights.sC * delayLine.s9; + sum.s6 += weights.sB * delayLine.sA; + sum.s6 += weights.sA * delayLine.sB; + sum.s6 += weights.s9 * delayLine.sC; + sum.s6 += weights.s8 * delayLine.sD; + sum.s6 += weights.s7 * delayLine.sE; + sum.s6 += weights.s6 * delayLine.sF; + sum.s6 += weights.s5 * delayLine.s0; + sum.s6 += weights.s4 * delayLine.s1; + sum.s6 += weights.s3 * delayLine.s2; + sum.s6 += weights.s2 * delayLine.s3; + sum.s6 += weights.s1 * delayLine.s4; + sum.s6 += weights.s0 * delayLine.s5; + (*filteredData)[station][pol][time + 6][channel][ri] = sum.s6; + + sum.s7 = weights.sF * delayLine.s7; + delayLine.s7 = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 8][channel][pol_ri]); + sum.s7 += weights.sE * delayLine.s8; + sum.s7 += weights.sD * delayLine.s9; + sum.s7 += weights.sC * delayLine.sA; + sum.s7 += weights.sB * delayLine.sB; + sum.s7 += weights.sA * delayLine.sC; + sum.s7 += weights.s9 * delayLine.sD; + sum.s7 += weights.s8 * delayLine.sE; + sum.s7 += weights.s7 * delayLine.sF; + sum.s7 += weights.s6 * delayLine.s0; + sum.s7 += weights.s5 * delayLine.s1; + sum.s7 += weights.s4 * delayLine.s2; + sum.s7 += weights.s3 * delayLine.s3; + sum.s7 += weights.s2 * delayLine.s4; + sum.s7 += weights.s1 * delayLine.s5; + sum.s7 += weights.s0 * delayLine.s6; + (*filteredData)[station][pol][time + 7][channel][ri] = sum.s7; + + sum.s8 = weights.sF * delayLine.s8; + delayLine.s8 = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 9][channel][pol_ri]); + sum.s8 += weights.sE * delayLine.s9; + sum.s8 += weights.sD * delayLine.sA; + sum.s8 += weights.sC * delayLine.sB; + sum.s8 += weights.sB * delayLine.sC; + sum.s8 += weights.sA * delayLine.sD; + sum.s8 += weights.s9 * delayLine.sE; + sum.s8 += weights.s8 * delayLine.sF; + sum.s8 += weights.s7 * delayLine.s0; + sum.s8 += weights.s6 * delayLine.s1; + sum.s8 += weights.s5 * delayLine.s2; + sum.s8 += weights.s4 * delayLine.s3; + sum.s8 += weights.s3 * delayLine.s4; + sum.s8 += weights.s2 * delayLine.s5; + sum.s8 += weights.s1 * delayLine.s6; + sum.s8 += weights.s0 * delayLine.s7; + (*filteredData)[station][pol][time + 8][channel][ri] = sum.s8; + + sum.s9 = weights.sF * delayLine.s9; + delayLine.s9 = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 10][channel][pol_ri]); + sum.s9 += weights.sE * delayLine.sA; + sum.s9 += weights.sD * delayLine.sB; + sum.s9 += weights.sC * delayLine.sC; + sum.s9 += weights.sB * delayLine.sD; + sum.s9 += weights.sA * delayLine.sE; + sum.s9 += weights.s9 * delayLine.sF; + sum.s9 += weights.s8 * delayLine.s0; + sum.s9 += weights.s7 * delayLine.s1; + sum.s9 += weights.s6 * delayLine.s2; + sum.s9 += weights.s5 * delayLine.s3; + sum.s9 += weights.s4 * delayLine.s4; + sum.s9 += weights.s3 * delayLine.s5; + sum.s9 += weights.s2 * delayLine.s6; + sum.s9 += weights.s1 * delayLine.s7; + sum.s9 += weights.s0 * delayLine.s8; + (*filteredData)[station][pol][time + 9][channel][ri] = sum.s9; + + sum.sA = weights.sF * delayLine.sA; + delayLine.sA = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 11][channel][pol_ri]); + sum.sA += weights.sE * delayLine.sB; + sum.sA += weights.sD * delayLine.sC; + sum.sA += weights.sC * delayLine.sD; + sum.sA += weights.sB * delayLine.sE; + sum.sA += weights.sA * delayLine.sF; + sum.sA += weights.s9 * delayLine.s0; + sum.sA += weights.s8 * delayLine.s1; + sum.sA += weights.s7 * delayLine.s2; + sum.sA += weights.s6 * delayLine.s3; + sum.sA += weights.s5 * delayLine.s4; + sum.sA += weights.s4 * delayLine.s5; + sum.sA += weights.s3 * delayLine.s6; + sum.sA += weights.s2 * delayLine.s7; + sum.sA += weights.s1 * delayLine.s8; + sum.sA += weights.s0 * delayLine.s9; + (*filteredData)[station][pol][time + 10][channel][ri] = sum.sA; + + sum.sB = weights.sF * delayLine.sB; + delayLine.sB = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 12][channel][pol_ri]); + sum.sB += weights.sE * delayLine.sC; + sum.sB += weights.sD * delayLine.sD; + sum.sB += weights.sC * delayLine.sE; + sum.sB += weights.sB * delayLine.sF; + sum.sB += weights.sA * delayLine.s0; + sum.sB += weights.s9 * delayLine.s1; + sum.sB += weights.s8 * delayLine.s2; + sum.sB += weights.s7 * delayLine.s3; + sum.sB += weights.s6 * delayLine.s4; + sum.sB += weights.s5 * delayLine.s5; + sum.sB += weights.s4 * delayLine.s6; + sum.sB += weights.s3 * delayLine.s7; + sum.sB += weights.s2 * delayLine.s8; + sum.sB += weights.s1 * delayLine.s9; + sum.sB += weights.s0 * delayLine.sA; + (*filteredData)[station][pol][time + 11][channel][ri] = sum.sB; + + sum.sC = weights.sF * delayLine.sC; + delayLine.sC = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 13][channel][pol_ri]); + sum.sC += weights.sE * delayLine.sD; + sum.sC += weights.sD * delayLine.sE; + sum.sC += weights.sC * delayLine.sF; + sum.sC += weights.sB * delayLine.s0; + sum.sC += weights.sA * delayLine.s1; + sum.sC += weights.s9 * delayLine.s2; + sum.sC += weights.s8 * delayLine.s3; + sum.sC += weights.s7 * delayLine.s4; + sum.sC += weights.s6 * delayLine.s5; + sum.sC += weights.s5 * delayLine.s6; + sum.sC += weights.s4 * delayLine.s7; + sum.sC += weights.s3 * delayLine.s8; + sum.sC += weights.s2 * delayLine.s9; + sum.sC += weights.s1 * delayLine.sA; + sum.sC += weights.s0 * delayLine.sB; + (*filteredData)[station][pol][time + 12][channel][ri] = sum.sC; + + sum.sD = weights.sF * delayLine.sD; + delayLine.sD = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 14][channel][pol_ri]); + sum.sD += weights.sE * delayLine.sE; + sum.sD += weights.sD * delayLine.sF; + sum.sD += weights.sC * delayLine.s0; + sum.sD += weights.sB * delayLine.s1; + sum.sD += weights.sA * delayLine.s2; + sum.sD += weights.s9 * delayLine.s3; + sum.sD += weights.s8 * delayLine.s4; + sum.sD += weights.s7 * delayLine.s5; + sum.sD += weights.s6 * delayLine.s6; + sum.sD += weights.s5 * delayLine.s7; + sum.sD += weights.s4 * delayLine.s8; + sum.sD += weights.s3 * delayLine.s9; + sum.sD += weights.s2 * delayLine.sA; + sum.sD += weights.s1 * delayLine.sB; + sum.sD += weights.s0 * delayLine.sC; + (*filteredData)[station][pol][time + 13][channel][ri] = sum.sD; + + sum.sE = weights.sF * delayLine.sE; + delayLine.sE = convert_float((*sampledData)[station][time + NR_TAPS - 1 + 15][channel][pol_ri]); + sum.sE += weights.sE * delayLine.sF; + sum.sE += weights.sD * delayLine.s0; + sum.sE += weights.sC * delayLine.s1; + sum.sE += weights.sB * delayLine.s2; + sum.sE += weights.sA * delayLine.s3; + sum.sE += weights.s9 * delayLine.s4; + sum.sE += weights.s8 * delayLine.s5; + sum.sE += weights.s7 * delayLine.s6; + sum.sE += weights.s6 * delayLine.s7; + sum.sE += weights.s5 * delayLine.s8; + sum.sE += weights.s4 * delayLine.s9; + sum.sE += weights.s3 * delayLine.sA; + sum.sE += weights.s2 * delayLine.sB; + sum.sE += weights.s1 * delayLine.sC; + sum.sE += weights.s0 * delayLine.sD; + (*filteredData)[station][pol][time + 14][channel][ri] = sum.sE; + + sum.sF = weights.sF * delayLine.sF; + sum.sF += weights.sE * delayLine.s0; + sum.sF += weights.sD * delayLine.s1; + sum.sF += weights.sC * delayLine.s2; + sum.sF += weights.sB * delayLine.s3; + sum.sF += weights.sA * delayLine.s4; + sum.sF += weights.s9 * delayLine.s5; + sum.sF += weights.s8 * delayLine.s6; + sum.sF += weights.s7 * delayLine.s7; + sum.sF += weights.s6 * delayLine.s8; + sum.sF += weights.s5 * delayLine.s9; + sum.sF += weights.s4 * delayLine.sA; + sum.sF += weights.s3 * delayLine.sB; + sum.sF += weights.s2 * delayLine.sC; + sum.sF += weights.s1 * delayLine.sD; + sum.sF += weights.s0 * delayLine.sE; + (*filteredData)[station][pol][time + 15][channel][ri] = sum.sF; + } +} diff --git a/RTCP/GPUProc/src/FIR.cl-0.ptx b/RTCP/GPUProc/src/FIR.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..d1a6fd1785efd5436cb75094fa168a1692b1643d Binary files /dev/null and b/RTCP/GPUProc/src/FIR.cl-0.ptx differ diff --git a/RTCP/GPUProc/src/FilterBank.cc b/RTCP/GPUProc/src/FilterBank.cc new file mode 100644 index 0000000000000000000000000000000000000000..beacf7cbcdf86a2243fda08c43abef4096a3b253 --- /dev/null +++ b/RTCP/GPUProc/src/FilterBank.cc @@ -0,0 +1,489 @@ +//# Always #include <lofar_config.h> first! +#include "lofar_config.h" + +#include "FilterBank.h" +#include "Common/LofarLogger.h" +#include "Interface/Align.h" +#include "Interface/Exceptions.h" + +#include <cmath> +#include <iostream> +#include <cstring> + +#if defined HAVE_FFTW3 +#include <fftw3.h> +#define fftw_real(x) ((x)[0]) +#define fftw_imag(x) ((x)[1]) +#elif defined HAVE_FFTW2 +#include <fftw.h> +#define fftw_real(x) (c_re(x)) +#define fftw_imag(x) (c_im(x)) +#else +#error Should have FFTW3 or FFTW2 installed +#endif + +namespace LOFAR { +namespace RTCP { + +#if USE_ORIGINAL_FILTER +#include <FIR_OriginalCepPPFWeights.h> +#endif + +// For documentation on this class, see the header file. + +FilterBank::FilterBank(bool verbose, unsigned taps, unsigned channels, WindowType windowType) +: + itsWindowType(windowType), itsNrTaps(taps), itsNrChannels(channels), itsVerbose(verbose), itsNegated(false) +{ + generate_filter(); +} + + +FilterBank::FilterBank(bool verbose, unsigned taps, unsigned channels, float newWeights[]) +: + itsWindowType(PREDEFINED_FILTER), itsNrTaps(taps), itsNrChannels(channels), itsVerbose(verbose), itsNegated(false) +{ + weights.resize(boost::extents[itsNrChannels][itsNrTaps]); + memcpy(weights.origin(), newWeights, (itsNrChannels * itsNrTaps) * sizeof(float)); +} + + +// hamming window function +void FilterBank::hamming(unsigned n, double d[]) +{ + if (n == 1) { + d[0] = 1.0; + return; + } + + unsigned m = n - 1; + + for (unsigned i = 0; i < n; i++) { + d[i] = 0.54 - 0.46 * cos((2.0 * M_PI * i) / m); + } +} + + +// blackman window function +void FilterBank::blackman(unsigned n, double d[]) +{ + if (n == 1) { + d[0] = 1.0; + return; + } + + unsigned m = n - 1; + + for (unsigned i = 0; i < n; i++) { + double k = i / m; + d[i] = 0.42 - 0.5 * cos(2.0 * M_PI * k) + 0.08 * cos(4.0 * M_PI * k); + } +} + + +// Guassian window function +void FilterBank::gaussian(int n, double a, double d[]) +{ + int index = 0; + + for (int i = -(n - 1); i <= n - 1; i += 2) { + d[index++] = exp(-0.5 * pow((a / n * i), 2)); + } +} + + +// Compute the modified Bessel function I_0(x) for any real x. +// This method was taken from the ROOT package, See http://root.cern.ch/root. +// It was released undet the GNU LESSER GENERAL PUBLIC LICENSE Version 2.1 +double FilterBank::besselI0(double x) +{ + // Parameters of the polynomial approximation + const double p1 = 1.0, p2 = 3.5156229, p3 = 3.0899424, p4 = 1.2067492, p5 = 0.2659732, p6 = 3.60768e-2, p7 = 4.5813e-3; + + const double q1 = 0.39894228, q2 = 1.328592e-2, q3 = 2.25319e-3, q4 = -1.57565e-3, q5 = 9.16281e-3, q6 = -2.057706e-2, q7 = 2.635537e-2, q8 = -1.647633e-2, + q9 = 3.92377e-3; + + const double k1 = 3.75; + double ax = abs(x); + + double y = 0, result = 0; + + if (ax < k1) { + double xx = x / k1; + y = xx * xx; + result = p1 + y * (p2 + y * (p3 + y * (p4 + y * (p5 + y * (p6 + y * p7))))); + } else { + y = k1 / ax; + result = (exp(ax) / sqrt(ax)) * (q1 + y * (q2 + y * (q3 + y * (q4 + y * (q5 + y * (q6 + y * (q7 + y * (q8 + y * q9)))))))); + } + + return result; +} + + +// Kaiser window function +void FilterBank::kaiser(int n, double beta, double d[]) +{ + if (n == 1) { + d[0] = 1.0; + return; + } + + int m = n - 1; + + for (int i = 0; i < n; i++) { + double k = 2.0 * beta / m * sqrt(i * (m - i)); + d[i] = besselI0(k) / besselI0(beta); + } +} + + +// One-dimensional interpolation. Interpolate Y, defined at the points X, +// at N evenly spaced points between 0 and 1. The sample points X must be strictly monotonic +void FilterBank::interpolate(const double x[], const double y[], unsigned xlen, unsigned n, double result[]) +{ + unsigned nextX = 0; + unsigned index = 0; + + for (double interpolatedX = 0; interpolatedX <= 1.0; interpolatedX += 1.0 / (n - 1), index++) { + while (x[nextX] <= interpolatedX && nextX < xlen - 1) + nextX++; + + if (nextX == 0) { + LOG_ERROR("ERROR in FilterBank::interpolate"); + } + + double prevXVal = x[nextX - 1]; + double nextXVal = x[nextX]; + double prevYVal = y[nextX - 1]; + double nextYVal = y[nextX]; + + double rc = (nextYVal - prevYVal) / (nextXVal - prevXVal); + + double newVal = prevYVal + (interpolatedX - prevXVal) * rc; + result[index] = newVal; + } +} + + +// Compute the filter, similar to Octave's fir2(n, f, m, grid_n, ramp_n, window); +// Window and result must be of size n+1. +// grid_n: length of ideal frequency response function +// ramp_n: transition width for jumps in filter response +// defaults to grid_n/20; a wider ramp gives wider transitions +// but has better stopband characteristics. +void FilterBank::generate_fir_filter(unsigned n, double w, const double window[], double result[]) +{ + // make sure grid is big enough for the window + // the grid must be at least (n+1)/2 + // for all filters where the order is a power of two minus 1, grid_n = n+1; + unsigned grid_n = nextPowerOfTwo(n + 1); + + unsigned ramp_n = 2; // grid_n/20; + + // Apply ramps to discontinuities + // this is a low pass filter + // maybe we can omit the "w, 0" point? + // I did observe a small difference + double f[] = { 0.0, w - ramp_n / grid_n / 2.0, w, w + ramp_n / grid_n / 2.0, 1.0 }; + double m[] = { 1.0, 1.0, 0.0, 0.0, 0.0 }; + + // grid is a 1-D array with grid_n+1 points. Values are 1 in filter passband, 0 otherwise + std::vector<double> grid(grid_n + 1); + + // interpolate between grid points + interpolate(f, m, 5 /* length of f and m arrays */, grid_n + 1, &grid[0]); + +#if 0 + std::stringstream logStr; + logStr << "interpolated = ["; + for(unsigned i=0; i<grid_n+1; i++) { + logStr << grid[i]; + if(i != grid_n+1-1) logStr << ", "; + } + logStr << "];"; + LOG_DEBUG(logStr.str()); +#endif + + // the grid we do an ifft on is: + // grid appended with grid_n*2 zeros + // appended with original grid values from indices grid_n..2, i.e., the values in reverse order + // (note, arrays start at 1 in octave!) + // the input for the ifft is of size 4*grid_n + // input = [grid ; zeros(grid_n*2,1) ;grid(grid_n:-1:2)]; + +#if defined HAVE_FFTW3 + fftwf_complex* cinput = (fftwf_complex*) fftwf_malloc(grid_n * 4 * sizeof(fftwf_complex)); + fftwf_complex* coutput = (fftwf_complex*) fftwf_malloc(grid_n * 4 * sizeof(fftwf_complex)); +#elif defined HAVE_FFTW2 + fftw_complex* cinput = (fftw_complex*) fftw_malloc(grid_n*4*sizeof(fftw_complex)); + fftw_complex* coutput = (fftw_complex*) fftw_malloc(grid_n*4*sizeof(fftw_complex)); +#endif + + if (cinput == NULL || coutput == NULL) { + THROW(GPUProcException, "cannot allocate buffers"); + } + + // wipe imaginary part + for (unsigned i = 0; i < grid_n * 4; i++) { + fftw_imag(cinput[i]) = 0.0; + } + + // copy first part of grid + for (unsigned i = 0; i < grid_n + 1; i++) { + fftw_real(cinput[i]) = grid[i]; + } + + // append zeros + for (unsigned i = grid_n + 1; i <= grid_n * 3; i++) { + fftw_real(cinput[i]) = 0.0; + } + + // now append the grid in reverse order + for (unsigned i = grid_n - 1, index = 0; i >= 1; i --, index ++) { + fftw_real(cinput[grid_n * 3 + 1 + index]) = grid[i]; + } + +#if 0 + std::stringstream logStr; + logStr << "ifft_in = ["; + for(unsigned i=0; i<grid_n*4; i++) { + logStr << fftw_real(cinput[i]) << " " << fftw_imag(cinput[i]); + if(i != grid_n*4-1) logStr << ", "; + } + logStr << "];"; + LOG_DEBUG(logStr.str()); +#endif + +#if defined HAVE_FFTW3 + fftwf_plan plan = fftwf_plan_dft_1d(grid_n * 4, cinput, coutput, FFTW_BACKWARD, FFTW_ESTIMATE); + fftwf_execute(plan); +#elif defined HAVE_FFTW2 + fftw_plan plan = fftw_create_plan(grid_n * 4, FFTW_BACKWARD, FFTW_ESTIMATE); + fftw_one(plan, cinput, coutput); +#endif + +#if 0 + for(unsigned i=0; i<grid_n*4; i++) { + LOG_DEBUG_STR("ifft result [" << i << "] = " << fftw_real(coutput[i]) << " " << fftw_imag(coutput[i])); + } +#endif + + // half end + // 1 2 n+1 2(n+1) 3(n+1) 4(n+1) + // x x x x x x x x x # last quarter + // x x x x x x # first quarter + + // last_quarter = b([end-n+1:2:end]); # the size is only 1/8, since we skip half of the elements + // first_quarter = b(2:2:(n+1)); # the size is only 1/8, since we skip half of the elements + + unsigned index = 0; + + for (unsigned i = 4 * grid_n - n; i < 4 * grid_n; i += 2) { + result[index] = fftw_real(coutput[i]); + index++; + } + + for (unsigned i = 1; i <= n; i += 2) { + result[index] = fftw_real(coutput[i]); + index++; + } + +#if defined HAVE_FFTW3 + fftwf_destroy_plan(plan); + fftwf_free(cinput); + fftwf_free(coutput); +#elif defined HAVE_FFTW2 + fftw_destroy_plan(plan); + fftw_free(cinput); + fftw_free(coutput); +#endif + + // multiply with window + for (unsigned i = 0; i <= n; i++) { + result[i] *= window[i]; + } + + // normalize + double factor = result[n / 2]; + for (unsigned i = 0; i <= n; i++) { + result[i] /= factor; + } + +#if 0 + std::stringstream logStr; + logStr << "result = ["; + for(unsigned i=0; i<=n; i++) { + logStr << result[i]; + if(i != n) logStr << ", "; + } + logStr << "];"; + LOG_DEBUG(logStr.str()); +#endif +} + + +#if ! USE_ORIGINAL_FILTER +// This method initializes the weights array. +void FilterBank::generate_filter() +{ + unsigned n = itsNrChannels * itsNrTaps; + + std::stringstream logStr; + + if (itsVerbose) { + logStr << "generating FIR filter bank with " << itsNrChannels << " channels and " << itsNrTaps << " taps (" << n << " total), using a "; + } + + std::vector<double> window(n); + + switch (itsWindowType) { + case HAMMING: { + // Use a n-point Hamming window. + if (itsVerbose) { + logStr << "Hamming window"; + LOG_DEBUG(logStr.str()); + } + hamming(n, &window[0]); + break; + } + case BLACKMAN: { + // Use a n-point Blackman window. + if (itsVerbose) { + logStr << "Blackman window"; + LOG_DEBUG(logStr.str()); + } + blackman(n, &window[0]); + break; + } + case GAUSSIAN: { + // Use a n-point Gaussian window. + double alpha = 3.5; + if (itsVerbose) { + logStr << "Gaussian window with alpha = " << alpha; + LOG_DEBUG(logStr.str()); + } + gaussian(n, alpha, &window[0]); + break; + } + case KAISER: { + // Use a n-point Kaiser window. + // The beta parameter is found in matlab / octave with + // [n,Wn,bta,filtype]=kaiserord([fsin/channels 1.4*fsin/channels],[1 0],[10^(0.5/20) 10^(-91/20)],fsin); + // where fsin is the sample freq + double beta = 9.0695; + if (itsVerbose) { + logStr << "Kaiser window with beta = " << beta; + LOG_DEBUG(logStr.str()); + } + kaiser(n, beta, &window[0]); + break; + } + default: + THROW(GPUProcException, "unknown window type"); + } + +#if 0 + std::stringstream logStr; + logStr << "window = ["; + for(unsigned i=0; i<n; i++) { + logStr << window[i]; + if(i != n-1) logStr << ", "; + } + logStr << "];"; + LOG_DEBUG(logStr.str()); +#endif + + std::vector<double> result(n); + + generate_fir_filter(n - 1, 1.0 / itsNrChannels, &window[0], &result[0]); + + weights.resize(boost::extents[itsNrChannels][itsNrTaps]); + + unsigned index = 0; + for (int tap = itsNrTaps - 1; tap >= 0; tap--) { // store the taps in reverse! + for (unsigned channel = 0; channel < itsNrChannels; channel++) { + // Correct total power. + // we use the 256 channel case as a reference, so we + // multiply by 256, and divide by the number of channels + weights[channel][tap] = result[index] * 256.0 / itsNrChannels; + index++; + } + } + +#if 0 + LOG_DEBUG("final taps: "); + std::stringstream logStr; + for(unsigned channel=0; channel<itsNrChannels; channel++) { + logStr << "channel: " << channel << "| "; + for(unsigned tap=0; tap<itsNrTaps; tap++) { + logStr << " " << weights[channel][tap]; + } + LOG_DEBUG(logStr.str()); + } +#endif +} + +#else // USE_ORIGINAL_FILTER +// This method initializes the weights array. +void FilterBank::generate_filter() +{ + if(itsVerbose) { + LOG_DEBUG("using original static 256 channel FIR filter bank"); + } + + if(itsNrTaps != 16 || itsNrChannels != 256) { + THROW(GPUProcException, "not supported!"); + } + weights.resize(boost::extents[itsNrChannels][itsNrTaps]); + memcpy(weights.origin(), origWeights, (itsNrChannels * itsNrTaps) * sizeof(float)); + itsNegated = true; +} +#endif // USE_ORIGINAL_FILTER +// In CEP, the first subband is from -98 KHz to 98 KHz, rather than from 0 to 195 KHz. +// To avoid that the FFT outputs the channels in the wrong order (from 128 to +// 255 followed by channels 0 to 127), we multiply each second FFT input by -1. +// This is efficiently achieved by negating the FIR filter constants of all +// uneven FIR filters. +void FilterBank::negateWeights() +{ + for (int tap = itsNrTaps - 1; tap >= 0; tap--) { // store the taps in reverse! + // Negate all odd channels + for (unsigned channel = 1; channel < itsNrChannels; channel += 2) { + weights[channel][tap] = -weights[channel][tap]; + } + } + itsNegated = !itsNegated; +} + + +// Used for debugging. +void FilterBank::reverseTaps() +{ + for (unsigned channel = 0; channel < itsNrChannels; channel++) { + for (unsigned tap = 0; tap < itsNrTaps/2; tap++) { + float tmp = weights[channel][itsNrTaps - tap - 1]; + weights[channel][itsNrTaps - tap - 1] = weights[channel][tap]; + weights[channel][tap] = tmp; + } + } +} + + +// Print the weights array in the natural order, in a format that can be read by gnuplot. +void FilterBank::printWeights() +{ + cout << (itsNegated ? "NEGATED" : "NORMAL(NOT NEGATED)") << endl; + for (int tap = itsNrTaps - 1; tap >= 0; tap--) { // taps are stored in reverse! + for (unsigned channel = 0; channel < itsNrChannels; channel++) { + if (itsNegated && channel % 2 != 0) { + cout << -weights[channel][tap] << endl; // odd channels are negated + } else { + cout << weights[channel][tap] << endl; + } + } + } +} + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/GPUProc/src/FilterBank.h b/RTCP/GPUProc/src/FilterBank.h new file mode 100644 index 0000000000000000000000000000000000000000..1815bfeb3aa7506aaff5f1414070cb5b0b67ec05 --- /dev/null +++ b/RTCP/GPUProc/src/FilterBank.h @@ -0,0 +1,114 @@ +#ifndef LOFAR_CNPROC_FILTER_BANK_H +#define LOFAR_CNPROC_FILTER_BANK_H + +#define USE_ORIGINAL_FILTER 0 + +#if 0 || !defined HAVE_BGP +#define FIR_C_IMPLEMENTATION +#endif + +#include <boost/multi_array.hpp> + +namespace LOFAR { +namespace RTCP { + +enum WindowType { HAMMING, BLACKMAN, GAUSSIAN, KAISER, PREDEFINED_FILTER }; + +// Note that the filter tap constants for a channel are in reverse order. +// This makes the implementation more efficient. + +class FilterBank +{ + public: + + // This constructor designs a new filter with the specified parameters, and initializes the weights array. + FilterBank(bool verbose, unsigned taps, unsigned channels, WindowType windowType); + + // This constructor creates a filterbank from an already existing set of weights. + FilterBank(bool verbose, unsigned taps, unsigned channels, float *weights); + + unsigned getNrTaps(); + + float *getWeights(unsigned channel); + const boost::multi_array<float, 2> &getWeights() const; // [nrChannels][taps]; + + // In CEP, the first subband is from -98 KHz to 98 KHz, rather than from 0 to 195 KHz. + // To avoid that the FFT outputs the channels in the wrong order (from 128 to + // 255 followed by channels 0 to 127), we multiply each second FFT input by -1. + // This is efficiently achieved by negating the FIR filter constants of all + // uneven FIR filters. + void negateWeights(); + + bool isNegated(); + + // Used for debugging. + void reverseTaps(); + + // Print the weights array in the natural order, in a format that can be read by gnuplot. + void printWeights(); + +private: + // Hamming window function + void hamming(unsigned n, double d[]); + + // Blackman window function + void blackman(unsigned n, double d[]); + + // Gaussian window function + void gaussian(int n, double a, double d[]); + + // Kaiser window function + void kaiser(int n, double beta, double d[]); + + // helper functions + double besselI0(double x); + void interpolate(const double x[], const double y[], unsigned xlen, unsigned n, double result[]); + void generate_fir_filter(unsigned n, double w, const double window[], double result[]); + void generate_filter(); + + + // The window used for generating the filter, default is KAISER. + WindowType itsWindowType; + + const unsigned itsNrTaps; + const unsigned itsNrChannels; + const bool itsVerbose; + bool itsNegated; + + // Store the weights in a multiarray, since both the number of channels are not known at compile time. + boost::multi_array<float, 2> weights; // [nrChannels][taps]; + +#if USE_ORIGINAL_FILTER + static const float originalCepPPFWeights[256][16]; +#endif + +}; + + +inline unsigned FilterBank::getNrTaps() +{ + return itsNrTaps; +} + + +inline float *FilterBank::getWeights(unsigned channel) +{ + return weights[channel].origin(); +} + + +inline const boost::multi_array<float, 2> &FilterBank::getWeights() const +{ + return weights; +} + + +inline bool FilterBank::isNegated() +{ + return itsNegated; +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/InputSection.cc b/RTCP/GPUProc/src/InputSection.cc new file mode 100644 index 0000000000000000000000000000000000000000..4fb852e78a12cd190fec3c80165b2d0c7e24758f --- /dev/null +++ b/RTCP/GPUProc/src/InputSection.cc @@ -0,0 +1,121 @@ +//# InputSection.cc: Catch RSP ethernet frames and synchronize RSP inputs +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: InputSection.cc 17893 2011-04-29 09:04:10Z romein $ + +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +//# Includes +#include <InputSection.h> +#include <Stream/SocketStream.h> +#include <Interface/Stream.h> + +#include <boost/format.hpp> +using boost::format; + +namespace LOFAR { +namespace RTCP { + + +template<typename SAMPLE_TYPE> InputSection<SAMPLE_TYPE>::InputSection(const Parset &parset, unsigned psetNumber) +{ + std::vector<Parset::StationRSPpair> inputs = parset.getStationNamesAndRSPboardNumbers(psetNumber); + string stationName = inputs.size() > 0 ? inputs[0].station : "none"; // TODO: support more than one station + itsNrRSPboards = inputs.size(); + + itsLogPrefix = str(format("[station %s] ") % stationName); + + itsBeamletBuffers.resize(itsNrRSPboards); + + for (unsigned rsp = 0; rsp < itsNrRSPboards; rsp ++) + itsBeamletBuffers[rsp] = new BeamletBuffer<SAMPLE_TYPE>(&parset, inputs[rsp].station, inputs[rsp].rsp); + + createInputStreams(parset, inputs); + createInputThreads(parset, inputs); +} + + +template<typename SAMPLE_TYPE> InputSection<SAMPLE_TYPE>::~InputSection() +{ + LOG_DEBUG_STR(itsLogPrefix << "InputSection::~InputSection()"); +} + + +template<typename SAMPLE_TYPE> void InputSection<SAMPLE_TYPE>::createInputStreams(const Parset &parset, const std::vector<Parset::StationRSPpair> &inputs) +{ + itsInputStreams.resize(itsNrRSPboards); + + for (unsigned i = 0; i < itsNrRSPboards; i ++) { + const std::string &station = inputs[i].station; + unsigned rsp = inputs[i].rsp; + std::string streamName = parset.getInputStreamName(station, rsp); + + LOG_DEBUG_STR(itsLogPrefix << "input " << i << ": RSP board " << rsp << ", reads from \"" << streamName << '"'); + +#if 0 + if (station != inputs[0].station) + THROW(GPUProcException, "inputs from multiple stations on one I/O node not supported (yet)"); +#endif + + itsInputStreams[i] = createStream(streamName, true); + + SocketStream *sstr = dynamic_cast<SocketStream *>(itsInputStreams[i].get()); + + if (sstr != 0) + sstr->setReadBufferSize(8 * 1024 * 1024); // stupid kernel multiplies this by 2 + } +} + + +template<typename SAMPLE_TYPE> void InputSection<SAMPLE_TYPE>::createInputThreads(const Parset &parset, const std::vector<Parset::StationRSPpair> &inputs) +{ + itsLogThread = new LogThread(itsNrRSPboards, inputs.size() > 0 ? inputs[0].station : "none"); + + /* start up thread which writes RSP data from ethernet link + into cyclic buffers */ + + typename InputThread<SAMPLE_TYPE>::ThreadArgs args; + + args.nrTimesPerPacket = parset.getInt32("OLAP.nrTimesInFrame"); + args.nrSlotsPerPacket = parset.nrSlotsInFrame(); + args.isRealTime = parset.realTime(); + args.startTime = TimeStamp(static_cast<int64>(parset.startTime() * parset.subbandBandwidth()), parset.clockSpeed()); + + itsInputThreads.resize(itsNrRSPboards); + + for (unsigned thread = 0; thread < itsNrRSPboards; thread ++) { + args.threadID = thread; + args.stream = itsInputStreams[thread]; + args.BBuffer = itsBeamletBuffers[thread]; + args.packetCounters = &itsLogThread->itsCounters[thread]; + args.logPrefix = str(format("[station %s board %s] ") % inputs[thread].station % inputs[thread].rsp); + + itsInputThreads[thread] = new InputThread<SAMPLE_TYPE>(args); + } +} + + +template class InputSection<i4complex>; +template class InputSection<i8complex>; +template class InputSection<i16complex>; + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/GPUProc/src/InputSection.h b/RTCP/GPUProc/src/InputSection.h new file mode 100644 index 0000000000000000000000000000000000000000..df05a3bc0ec2da383b16557a7a701d8f76d877a2 --- /dev/null +++ b/RTCP/GPUProc/src/InputSection.h @@ -0,0 +1,71 @@ +//# InputSection.h: Catch RSP ethernet frames and synchronize RSP inputs +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: InputSection.h 17893 2011-04-29 09:04:10Z romein $ + +#ifndef LOFAR_GPUPROC_INPUTSECTION_H +#define LOFAR_GPUPROC_INPUTSECTION_H + +// \file +// Catch RSP ethernet frames and synchronize RSP inputs + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + +//# Includes +#include <Interface/Parset.h> +#include <Interface/SmartPtr.h> +#include <Stream/Stream.h> +#include <BeamletBuffer.h> +#include <InputThread.h> +#include <LogThread.h> + +#include <boost/multi_array.hpp> +#include <pthread.h> + + +namespace LOFAR { +namespace RTCP { + +template <typename SAMPLE_TYPE> class InputSection +{ + public: + InputSection(const Parset &, unsigned psetNumber); + ~InputSection(); + + std::vector<SmartPtr<BeamletBuffer<SAMPLE_TYPE> > > itsBeamletBuffers; + + private: + void createInputStreams(const Parset &, const std::vector<Parset::StationRSPpair> &inputs); + void createInputThreads(const Parset &, const std::vector<Parset::StationRSPpair> &inputs); + + std::string itsLogPrefix; + + std::vector<SmartPtr<Stream > > itsInputStreams; + + unsigned itsNrRSPboards; + + SmartPtr<LogThread> itsLogThread; + std::vector<SmartPtr<InputThread<SAMPLE_TYPE> > > itsInputThreads; +}; + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/InputThread.cc b/RTCP/GPUProc/src/InputThread.cc new file mode 100644 index 0000000000000000000000000000000000000000..407a96c00d34dd0d72f4ab2ae2b00d3bd68e9fcc --- /dev/null +++ b/RTCP/GPUProc/src/InputThread.cc @@ -0,0 +1,194 @@ +//# InputThread.cc: the thread that reads from a Stream and places data into +//# the buffer of the input section +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: InputThread.cc 17975 2011-05-10 09:52:51Z mol $ + +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +//# Includes +#include <Common/DataConvert.h> +#include <Common/LofarLogger.h> +#include <Common/SystemCallException.h> +#include <Common/Timer.h> +#include <Interface/AlignedStdAllocator.h> +#include <Interface/Exceptions.h> +#include <Stream/NullStream.h> +#include <Stream/SocketStream.h> +#include <BeamletBuffer.h> +#include <InputThread.h> +#include <RSP.h> +#include <Scheduling.h> +#include <Common/Thread/Cancellation.h> + +#include <cstddef> + +#include <boost/multi_array.hpp> + + +namespace LOFAR { +namespace RTCP { + + +template <typename SAMPLE_TYPE> InputThread<SAMPLE_TYPE>::InputThread(ThreadArgs args /* call by value! */) +: + itsArgs(args), + itsThread(this, &InputThread<SAMPLE_TYPE>::mainLoop, itsArgs.logPrefix + "[InputThread] ", 65536) +{ + LOG_DEBUG_STR(itsArgs.logPrefix << "InputThread::InputThread(...)"); +} + + +template <typename SAMPLE_TYPE> InputThread<SAMPLE_TYPE>::~InputThread() +{ + LOG_DEBUG_STR(itsArgs.logPrefix << "InputThread::~InputThread()"); + itsThread.cancel(); +} + + +template <typename SAMPLE_TYPE> void InputThread<SAMPLE_TYPE>::mainLoop() +{ +#if 1 && defined HAVE_BGP_ION + if (itsArgs.threadID == 0) + runOnCore0(); + else + doNotRunOnCore0(); +#endif + + const unsigned maxNrPackets = 128; + TimeStamp actualstamp = itsArgs.startTime - itsArgs.nrTimesPerPacket; + unsigned packetSize = sizeof(struct RSP::Header) + itsArgs.nrSlotsPerPacket * itsArgs.nrTimesPerPacket * NR_POLARIZATIONS * sizeof(SAMPLE_TYPE); + + std::vector<TimeStamp> timeStamps(maxNrPackets); + boost::multi_array<char, 2, AlignedStdAllocator<char, 32> > packets(boost::extents[maxNrPackets][packetSize]); + //boost::multi_array<char, 2, FlatMemoryAllocator<char> > packets(boost::extents[maxNrPackets][packetSize]); + + char *currentPacketPtr = packets.origin(); + unsigned currentPacket = 0; + + unsigned previousSeqid = 0; + bool previousSeqidIsAccepted = false; + + bool dataShouldContainValidStamp = dynamic_cast<NullStream *>(itsArgs.stream) == 0; + bool isUDPstream = dynamic_cast<SocketStream *>(itsArgs.stream) != 0 && dynamic_cast<SocketStream *>(itsArgs.stream)->protocol == SocketStream::UDP; + WallClockTime wallClockTime; + + LOG_DEBUG_STR(itsArgs.logPrefix << " input thread " << itsArgs.threadID << " entering loop"); + + while (true) { + try { + // cancelable read, to allow stopping this thread even if the station + // does not send data + + if (isUDPstream) { + if (itsArgs.stream->tryRead(currentPacketPtr, packetSize) != packetSize) { + ++ itsArgs.packetCounters->received; + ++ itsArgs.packetCounters->badSize; + continue; + } + } else { + Cancellation::point(); // allow cancellation from null: + itsArgs.stream->read(currentPacketPtr, packetSize); + } + } catch (Stream::EndOfStreamException &) { + break; + } catch (SystemCallException &ex) { + if (ex.error == EINTR) + break; + else + throw; + } + + ++ itsArgs.packetCounters->received; + + if (dataShouldContainValidStamp) { +#if defined __PPC__ + unsigned seqid, blockid; + + asm volatile ("lwbrx %0,%1,%2" : "=r" (seqid) : "b" (currentPacketPtr), "r" (offsetof(RSP, header.timestamp))); + asm volatile ("lwbrx %0,%1,%2" : "=r" (blockid) : "b" (currentPacketPtr), "r" (offsetof(RSP, header.blockSequenceNumber))); +#else + unsigned seqid = reinterpret_cast<RSP *>(currentPacketPtr)->header.timestamp; + unsigned blockid = reinterpret_cast<RSP *>(currentPacketPtr)->header.blockSequenceNumber; + +#if defined WORDS_BIGENDIAN + seqid = byteSwap(seqid); + blockid = byteSwap(blockid); +#endif +#endif + + //if the seconds counter is 0xFFFFFFFF, the data cannot be trusted. + if (seqid == ~0U) { + ++ itsArgs.packetCounters->badTimeStamp; + continue; + } + + // Sanity check on seqid. Note, that seqid is in seconds, + // so a value which is greater than the previous one with more + // than (say) 10 seconds probably means that the sequence number + // in the packet is wrong. This can happen, since communication is not + // reliable. + if (seqid >= previousSeqid + 10 && previousSeqidIsAccepted) { + previousSeqidIsAccepted = false; + ++ itsArgs.packetCounters->badTimeStamp; + continue; + } + + // accept seqid + previousSeqidIsAccepted = true; + previousSeqid = seqid; + + actualstamp.setStamp(seqid, blockid); + } else { + actualstamp += itsArgs.nrTimesPerPacket; + + if (itsArgs.isRealTime) + wallClockTime.waitUntil(actualstamp); + } + + // expected packet received so write data into corresponding buffer + //itsArgs.BBuffer->writePacketData(reinterpret_cast<SAMPLE_TYPE *>(&packet.data), actualstamp); + + timeStamps[currentPacket] = actualstamp; + currentPacketPtr += packetSize; + + if (++ currentPacket == maxNrPackets) { + itsArgs.BBuffer->writeMultiplePackets(packets.origin(), timeStamps); + // pthread_yield(); + currentPacket = 0; + currentPacketPtr = packets.origin(); + } + } + + timeStamps.resize(currentPacket); + itsArgs.BBuffer->writeMultiplePackets(packets.origin(), timeStamps); + itsArgs.BBuffer->noMoreWriting(); + + LOG_DEBUG_STR(itsArgs.logPrefix << "InputThread::mainLoop() exiting"); +} + + +template class InputThread<i4complex>; +template class InputThread<i8complex>; +template class InputThread<i16complex>; + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/GPUProc/src/InputThread.h b/RTCP/GPUProc/src/InputThread.h new file mode 100644 index 0000000000000000000000000000000000000000..fa597e84b47a70334897c8b83834096ec1e2bd51 --- /dev/null +++ b/RTCP/GPUProc/src/InputThread.h @@ -0,0 +1,75 @@ +//# InputThread.h: The thread that reads from a TH and places data into the buffer of the input section +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: InputThread.h 17975 2011-05-10 09:52:51Z mol $ + +#ifndef LOFAR_GPUPROC_INPUTTHREAD_H +#define LOFAR_GPUPROC_INPUTTHREAD_H + +// \file +// The thread that reads from a Stream and places data into the buffer of the input section + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + +//# Includes +#include <Common/lofar_vector.h> +#include <Interface/RSPTimeStamp.h> +#include <Common/Thread/Thread.h> + +#include <BeamletBuffer.h> +#include <LogThread.h> + + +namespace LOFAR { +namespace RTCP { + +template<typename SAMPLE_TYPE> class InputThread +{ + public: + struct ThreadArgs { + BeamletBuffer<SAMPLE_TYPE> *BBuffer; + Stream *stream; + + unsigned threadID; + unsigned nrTimesPerPacket; + unsigned nrSlotsPerPacket; + LogThread::Counters *packetCounters; + bool isRealTime; + TimeStamp startTime; + + std::string logPrefix; + }; + + InputThread(ThreadArgs args); + ~InputThread(); + + void mainLoop(); + + static const unsigned packetBuffersSize = 128; + + private: + ThreadArgs itsArgs; + Thread itsThread; +}; + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/Job.cc b/RTCP/GPUProc/src/Job.cc new file mode 100644 index 0000000000000000000000000000000000000000..df6eb11101fbe52c90fc47e54f5ffc4011e81e86 --- /dev/null +++ b/RTCP/GPUProc/src/Job.cc @@ -0,0 +1,905 @@ +//# +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: ION_main.cc 15296 2010-03-24 10:19:41Z romein $ + +#include <lofar_config.h> + +#include <BeamletBufferToComputeNode.h> +#include <ControlPhase3Cores.h> +#include <Common/LofarLogger.h> +#include <Interface/CN_Command.h> +#include <Interface/Exceptions.h> +#include <Interface/OutputTypes.h> +#include <Interface/PrintVector.h> +#include <Interface/RSPTimeStamp.h> +#include <InputSection.h> +#include <ION_Allocator.h> +#include <ION_main.h> +#include <Job.h> +#include <OutputSection.h> +#include <StreamMultiplexer.h> +#include <Stream/SocketStream.h> + +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <fcntl.h> +#include <signal.h> + +#include <boost/format.hpp> + + +#define LOG_CONDITION (myPsetNumber == 0) + +namespace LOFAR { +namespace RTCP { + +unsigned Job::nextJobID = 1; +void *Job::theInputSection; +Mutex Job::theInputSectionMutex; +unsigned Job::theInputSectionRefCount = 0; + +Queue<Job *> finishedJobs; + + +Job::Job(const char *parsetName) +: + itsParset(parsetName), + itsJobID(nextJobID ++), // no need to make thread safe + itsObservationID(itsParset.observationID()), + itsIsRunning(false), + itsDoCancel(false), + itsBlockNumber(0), + itsRequestedStopTime(0.0), + itsStopTime(0.0) +{ + itsLogPrefix = str(boost::format("[obs %d] ") % itsParset.observationID()); + + if (LOG_CONDITION) { + LOG_INFO_STR(itsLogPrefix << "----- Creating new job"); + LOG_DEBUG_STR(itsLogPrefix << "usedCoresInPset = " << itsParset.usedCoresInPset()); + + // Handle PVSS (CEPlogProcessor) communication + if (itsParset.PVSS_TempObsName() != "") + LOG_INFO_STR(itsLogPrefix << "PVSS name: " << itsParset.PVSS_TempObsName()); + } + + + // Handle PLC communication + if (myPsetNumber == 0) { + if (itsParset.PLC_controlled()) { + // let the ApplController decide what we should do + try { + itsPLCStream = new SocketStream(itsParset.PLC_Host().c_str(), itsParset.PLC_Port(), SocketStream::TCP, SocketStream::Server, 60); + + itsPLCClient = new PLCClient(*itsPLCStream, *this, itsParset.PLC_ProcID(), itsObservationID); + } catch (Exception &ex) { + LOG_WARN_STR(itsLogPrefix << "Could not connect to ApplController on " << itsParset.PLC_Host() << ":" << itsParset.PLC_Port() << " as " << itsParset.PLC_ProcID() << " -- continuing on autopilot: " << ex); + } + } + } + + // check enough parset settings just to get to the coordinated check in jobThread safely + if (itsParset.CNintegrationTime() <= 0) + THROW(IONProcException,"CNintegrationTime must be bigger than 0"); + + // synchronize roughly every 5 seconds to see if the job is cancelled + itsNrBlockTokensPerBroadcast = static_cast<unsigned>(ceil(5.0 / itsParset.CNintegrationTime())); + itsNrBlockTokens = 1; // trigger a rendez-vous immediately to sync latest stoptime info + + itsHasPhaseOne = itsParset.phaseOnePsetIndex(myPsetNumber) >= 0; + itsHasPhaseTwo = itsParset.phaseTwoPsetIndex(myPsetNumber) >= 0; + itsHasPhaseThree = itsParset.phaseThreePsetIndex(myPsetNumber) >= 0; + + itsJobThread = new Thread(this, &Job::jobThread, itsLogPrefix + "[JobThread] ", 65536); +} + + +Job::~Job() +{ + if (LOG_CONDITION) + LOG_INFO_STR(itsLogPrefix << "----- Job " << (itsIsRunning ? "finished" : "cancelled") << " successfully"); +} + + +void Job::createIONstreams() +{ + if (myPsetNumber == 0) { + std::vector<unsigned> involvedPsets = itsParset.usedPsets(); + + for (unsigned i = 0; i < involvedPsets.size(); i ++) { + ASSERT(involvedPsets[i] < allIONstreamMultiplexers.size()); + + if (involvedPsets[i] != 0) // do not send to itself + itsIONstreams.push_back(new MultiplexedStream(*allIONstreamMultiplexers[involvedPsets[i]], itsJobID)); + } + } else { + itsIONstreams.push_back(new MultiplexedStream(*allIONstreamMultiplexers[0], itsJobID)); + } +} + + +void Job::barrier() +{ + char byte = 0; + + if (myPsetNumber == 0) { + for (unsigned i = 0; i < itsIONstreams.size(); i ++) { + itsIONstreams[i]->read(&byte, sizeof byte); + itsIONstreams[i]->write(&byte, sizeof byte); + } + } else { + itsIONstreams[0]->write(&byte, sizeof byte); + itsIONstreams[0]->read(&byte, sizeof byte); + } +} + + +// returns true iff all psets supply true +bool Job::agree(bool iAgree) +{ + bool allAgree = iAgree; // pset 0 needs to start with its own decision, for other psets this value is ignored + + if (myPsetNumber == 0) + for (unsigned i = 0; i < itsIONstreams.size(); i ++) { + bool youAgree; + itsIONstreams[i]->read(&youAgree, sizeof youAgree); + + allAgree = allAgree && youAgree; + } + else + itsIONstreams[0]->write(&iAgree, sizeof iAgree); + + broadcast(allAgree); + + return allAgree; +} + + +template <typename T> void Job::broadcast(T &value) +{ + if (myPsetNumber == 0) + for (unsigned i = 0; i < itsIONstreams.size(); i ++) + itsIONstreams[i]->write(&value, sizeof value); + else + itsIONstreams[0]->read(&value, sizeof value); +} + + +static void exitwitherror( const char *errorstr ) +{ + // can't cast to (void) since gcc won't allow that as a method to drop the result + int ignoreResult; + + ignoreResult = write(STDERR_FILENO, errorstr, strlen(errorstr)+1); + + // use _exit instead of exit to avoid calling atexit handlers in both + // the master and the child process. + _exit(1); +} + +void Job::execSSH(const char *sshKey, const char *userName, const char *hostName, const char *executable, const char *rank, const char *parset, const char *cwd, const char *isBigEndian) +{ + // DO NOT DO ANY CALL THAT GRABS A LOCK, since the lock may be held by a + // thread that is no longer part of our address space + + // use write() for output since the Logger uses a mutex, and printf also holds locks + + // Prevent cancellation due to race conditions. A cancellation can still be pending for this JobThread, in which case one of the system calls + // below triggers it. If this thread/process can be cancelled, there will be multiple processes running, leading to all kinds of Bad Things. + Cancellation::disable(); + + // close all file descriptors other than stdin/out/err, which might have been openend by + // other threads at the time of fork(). We brute force over all possible fds, most of which will be invalid. + for (int f = sysconf(_SC_OPEN_MAX); f > 2; --f) + (void)close(f); + + // create a valid stdin from which can be read (a blocking fd created by pipe() won't suffice anymore for since at least OpenSSH 5.8) + // rationale: this forked process inherits stdin from the parent process, which is unusable because IONProc is started in the background + // and routed through mpirun as well. Also, it is shared by all forked processes. Nevertheless, we want Storage to be able to determine + // when to shut down based on whether stdin is open. So we create a new stdin. + int devzero = open("/dev/zero", O_RDONLY); + + if (devzero < 0) + exitwitherror("cannot open /dev/zero\n"); + + if (close(0) < 0) + exitwitherror("cannot close stdin\n"); + + if (dup(devzero) < 0) + exitwitherror("cannot dup /dev/zero into stdin\n"); + + if (close(devzero) < 0) + exitwitherror("cannot close /dev/zero\n"); + + if (execl("/usr/bin/ssh", + "ssh", + "-q", + "-i", sshKey, + "-c", "blowfish", + "-o", "StrictHostKeyChecking=no", + "-o", "UserKnownHostsFile=/dev/null", + "-o", "ServerAliveInterval=30", + "-l", userName, + hostName, + + "cd", cwd, "&&", +#if defined USE_VALGRIND + "valgrind", "--leak-check=full", +#endif + executable, rank, parset, isBigEndian, + + static_cast<char *>(0) + ) < 0) + exitwitherror("execl failed\n"); + + exitwitherror("execl succeeded but did return\n"); +} + + +void Job::forkSSH(const char *sshKey, const char *userName, const char *hostName, const char *executable, const char *rank, const char *parset, const char *cwd, const char *isBigEndian, int &storagePID) +{ + LOG_INFO_STR("Storage writer on " << hostName << ": starting as rank " << rank); + LOG_DEBUG_STR("child will exec " + "\"/usr/bin/ssh " + "-q " + "-i " << sshKey << " " + "-c blowfish " + "-o StrictHostKeyChecking=no " + "-o UserKnownHostsFile=/dev/null " + "-o ServerAliveInterval=30 " + "-l " << userName << " " + << hostName << " " + "cd " << cwd << " && " +#if defined USE_VALGRIND + "valgrind " "--leak-check=full " +#endif + << executable << " " << rank << " " << parset << " " << isBigEndian << " " + "\"" + ); + + switch (storagePID = fork()) { + case -1 : throw SystemCallException("fork", errno, THROW_ARGS); + + case 0 : execSSH(sshKey, userName, hostName, executable, rank, parset, cwd, isBigEndian); + } +} + + +void Job::joinSSH(int childPID, const std::string &hostName, unsigned &timeout) +{ + if (childPID != 0) { + int status; + + // always try at least one waitpid(). if child has not exited, optionally + // sleep and try again. + for (;;) { + pid_t ret; + + if ((ret = waitpid(childPID, &status, WNOHANG)) == (pid_t)-1) { + int error = errno; + + if (error == EINTR) { + LOG_DEBUG_STR(itsLogPrefix << "Storage writer on " << hostName << " : waitpid() was interrupted -- retrying"); + continue; + } + + // error + LOG_WARN_STR(itsLogPrefix << "Storage writer on " << hostName << " : waitpid() failed with errno " << error); + return; + } else if (ret == 0) { + // child still running + if (timeout == 0) { + break; + } + + timeout--; + sleep(1); + } else { + // child exited + if (WIFSIGNALED(status) != 0) + LOG_WARN_STR(itsLogPrefix << "SSH to storage writer on " << hostName << " was killed by signal " << WTERMSIG(status)); + else if (WEXITSTATUS(status) != 0) { + const char *explanation; + + switch (WEXITSTATUS(status)) { + default: + explanation = "??"; + break; + + case 255: + explanation = "Network or authentication error"; + break; + case 127: + explanation = "BASH: command/library not found"; + break; + case 126: + explanation = "BASH: command found but could not be executed (wrong architecture?)"; + break; + + case 128 + SIGHUP: + explanation = "killed by SIGHUP"; + break; + case 128 + SIGINT: + explanation = "killed by SIGINT (Ctrl-C)"; + break; + case 128 + SIGQUIT: + explanation = "killed by SIGQUIT"; + break; + case 128 + SIGILL: + explanation = "illegal instruction"; + break; + case 128 + SIGABRT: + explanation = "killed by SIGABRT"; + break; + case 128 + SIGKILL: + explanation = "killed by SIGKILL"; + break; + case 128 + SIGSEGV: + explanation = "segmentation fault"; + break; + case 128 + SIGPIPE: + explanation = "broken pipe"; + break; + case 128 + SIGALRM: + explanation = "killed by SIGALRM"; + break; + case 128 + SIGTERM: + explanation = "killed by SIGTERM"; + break; + } + + LOG_ERROR_STR(itsLogPrefix << "Storage writer on " << hostName << " exited with exit code " << WEXITSTATUS(status) << " (" << explanation << ")" ); + } else + LOG_INFO_STR(itsLogPrefix << "Storage writer on " << hostName << " terminated normally"); + + return; + } + } + + // child did not exit within the given timeout + + LOG_WARN_STR(itsLogPrefix << "Storage writer on " << hostName << " : sending SIGTERM"); + kill(childPID, SIGTERM); + + if (waitpid(childPID, &status, 0) == -1) { + LOG_WARN_STR(itsLogPrefix << "Storage writer on " << hostName << " : waitpid() failed"); + } + + LOG_WARN_STR(itsLogPrefix << "Storage writer on " << hostName << " terminated after sending SIGTERM"); + } +} + + +void Job::startStorageProcesses() +{ + itsStorageHostNames = itsParset.getStringVector("OLAP.Storage.hosts"); + + std::string userName = itsParset.getString("OLAP.Storage.userName"); + std::string sshKey = itsParset.getString("OLAP.Storage.sshIdentityFile"); + std::string executable = itsParset.getString("OLAP.Storage.msWriter"); + std::string parset = itsParset.name(); + + char cwd[1024]; + + if (getcwd(cwd, sizeof cwd) == 0) { + throw SystemCallException("getcwd", errno, THROW_ARGS); + } + + itsStoragePIDs.resize(itsStorageHostNames.size()); + + for (unsigned rank = 0; rank < itsStorageHostNames.size(); rank ++) + forkSSH(sshKey.c_str(), + userName.c_str(), + itsStorageHostNames[rank].c_str(), + executable.c_str(), + boost::lexical_cast<std::string>(rank).c_str(), + parset.c_str(), + cwd, +#if defined WORDS_BIGENDIAN + "1", +#else + "0", +#endif + itsStoragePIDs[rank]); +} + + +void Job::stopStorageProcesses() +{ + // warning: there could be zero storage processes + unsigned timeleft = 10; + + for (unsigned rank = 0; rank < itsStoragePIDs.size(); rank ++) + joinSSH(itsStoragePIDs[rank], itsStorageHostNames[rank], timeleft); +} + + +void Job::waitUntilCloseToStartOfObservation(time_t secondsPriorToStart) +{ + time_t closeToStart = static_cast<time_t>(itsParset.startTime()) - secondsPriorToStart; + char buf[26]; + + ctime_r(&closeToStart, buf); + buf[24] = '\0'; + + LOG_INFO_STR(itsLogPrefix << "Waiting for job to start: sleeping until " << buf); + + itsWallClockTime.waitUntil(closeToStart); +} + + +void Job::cancel() +{ + // note that JobQueue holds lock, so that this function executes atomically + + if (itsDoCancel) { + LOG_WARN_STR(itsLogPrefix << "Observation already cancelled"); + } else { + itsDoCancel = true; + //jobQueue.itsReevaluate.broadcast(); + + if (itsParset.realTime()) + itsWallClockTime.cancelWait(); + } +} + + +void Job::claimResources() +{ + ScopedLock scopedLock(jobQueue.itsMutex); + + while (!itsDoCancel) { + bool conflict = false; + + for (std::vector<Job *>::iterator job = jobQueue.itsJobs.begin(); job != jobQueue.itsJobs.end(); job ++) { + std::stringstream error; + + if ((*job)->itsIsRunning && (*job)->itsParset.conflictingResources(itsParset, error)) { + conflict = true; + LOG_WARN_STR(itsLogPrefix << "Postponed due to resource conflict with job " << (*job)->itsObservationID << ": " << error.str()); + } + } + + if (!conflict) { + itsIsRunning = true; + return; + } + + jobQueue.itsReevaluate.wait(jobQueue.itsMutex); + } +} + + +void Job::jobThread() +{ + if (myPsetNumber == 0 || itsHasPhaseOne || itsHasPhaseTwo || itsHasPhaseThree) { + createCNstreams(); + createIONstreams(); + + if (myPsetNumber == 0) { + // PLC: DEFINE phase + bool canStart = true; + + if (!checkParset()) { + canStart = false; + } + + if (!itsPLCClient) { + // we are either not PLC controlled, or we're supposed to be but can't connect to + // the ApplController + LOG_INFO_STR(itsLogPrefix << "Not controlled by ApplController"); + + // perform some functions which ApplController would have us do + + // obey the stop time in the parset -- the first anotherRun() will broadcast it + if (!pause(itsParset.stopTime())) { + LOG_ERROR_STR(itsLogPrefix << "Could not set observation stop time"); + canStart = false; + } + } + + if (canStart) { + // PLC: INIT phase + if (itsParset.realTime()) + waitUntilCloseToStartOfObservation(10); + + // PLC: in practice, RUN must start here, because resources + // can become available just before the observation starts. + // This means we will miss the beginning of the observation + // for now, because we need to calculate the delays still, + // which can only be done if we know the start time. + // That means we forgo full PLC control for now and ignore + // the init/run commands. In practice, the define command + // won't be useful either since we'll likely disconnect + // due to an invalid parset before PLC can ask. + + claimResources(); + + // we could start Storage before claiming resources + if (itsIsRunning && itsParset.hasStorage()) + startStorageProcesses(); + } + } + + broadcast(itsIsRunning); + + if (itsIsRunning) { + // PLC: RUN phase + + // each node is expected to: + // 1. agree() on starting, to allow the compute nodes to complain in preprocess() + // 2. call anotherRun() until the end of the observation to synchronise the + // stop time. + + if (itsHasPhaseOne || itsHasPhaseTwo || itsHasPhaseThree) { + switch (itsParset.nrBitsPerSample()) { + case 4 : doObservation<i4complex>(); + break; + + case 8 : doObservation<i8complex>(); + break; + + case 16 : doObservation<i16complex>(); + break; + } + } else { + if (agree(true)) { // we always agree on the fact that we can start + // force pset 0 to broadcast itsIsRunning periodically + while (anotherRun()) + ; + } + } + + // PLC: PAUSE phase + barrier(); + + // PLC: RELEASE phase + + // all InputSections and OutputSections have finished their processing, so + // Storage should be done any second now. + + stopStorageProcesses(); + } + } + + finishedJobs.append(this); +} + + +void Job::createCNstreams() +{ + std::vector<unsigned> usedCoresInPset = itsParset.usedCoresInPset(); + + itsCNstreams.resize(usedCoresInPset.size()); + + for (unsigned core = 0; core < usedCoresInPset.size(); core ++) + itsCNstreams[core] = allCNstreams[usedCoresInPset[core]]; + + if (itsHasPhaseOne || itsHasPhaseTwo) { + std::vector<unsigned> phaseOneTwoCores = itsParset.phaseOneTwoCores(); + + itsPhaseOneTwoCNstreams.resize(phaseOneTwoCores.size()); + + for (unsigned core = 0; core < phaseOneTwoCores.size(); core ++) + itsPhaseOneTwoCNstreams[core] = allCNstreams[phaseOneTwoCores[core]]; + } + + if (itsHasPhaseThree) { + std::vector<unsigned> phaseThreeCores = itsParset.phaseThreeCores(); + + itsPhaseThreeCNstreams.resize(phaseThreeCores.size()); + + for (unsigned core = 0; core < phaseThreeCores.size(); core ++) + itsPhaseThreeCNstreams[core] = allCNstreams[phaseThreeCores[core]]; + } +} + + +template <typename SAMPLE_TYPE> void Job::attachToInputSection() +{ + ScopedLock scopedLock(theInputSectionMutex); + + if (theInputSectionRefCount == 0) { + theInputSection = new InputSection<SAMPLE_TYPE>(itsParset, myPsetNumber); + ++ theInputSectionRefCount; + } +} + + +template <typename SAMPLE_TYPE> void Job::detachFromInputSection() +{ + ScopedLock scopedLock(theInputSectionMutex); + + if (-- theInputSectionRefCount == 0) + delete static_cast<InputSection<SAMPLE_TYPE> *>(theInputSection); +} + + +bool Job::configureCNs() +{ + bool success = true; + + CN_Command command(CN_Command::PREPROCESS); + + LOG_DEBUG_STR(itsLogPrefix << "Configuring cores " << itsParset.usedCoresInPset() << " ..."); + + for (unsigned core = 0; core < itsCNstreams.size(); core ++) { + command.write(itsCNstreams[core]); + itsParset.write(itsCNstreams[core]); + } + +#if 0 // FIXME: leads to deadlock when using TCP + for (unsigned core = 0; core < itsCNstreams.size(); core ++) { + char failed; + itsCNstreams[core]->read(&failed, sizeof failed); + + if (failed) { + LOG_ERROR_STR(itsLogPrefix << "Core " << core << " failed to initialise"); + success = false; + } + } +#endif + + LOG_DEBUG_STR(itsLogPrefix << "Configuring cores " << itsParset.usedCoresInPset() << " done"); + + return success; +} + + +void Job::unconfigureCNs() +{ + CN_Command command(CN_Command::POSTPROCESS); + + LOG_DEBUG_STR(itsLogPrefix << "Unconfiguring cores " << itsParset.usedCoresInPset() << " ..."); + + for (unsigned core = 0; core < itsCNstreams.size(); core ++) + command.write(itsCNstreams[core]); + + LOG_DEBUG_STR(itsLogPrefix << "Unconfiguring cores " << itsParset.usedCoresInPset() << " done"); +} + + +bool Job::anotherRun() +{ + if (-- itsNrBlockTokens == 0) { + itsNrBlockTokens = itsNrBlockTokensPerBroadcast; + + // only consider cancelling at itsNrBlockTokensPerBroadcast boundaries + itsIsRunning = !itsDoCancel; + + // only allow pset 0 to actually decide whether or not to stop + broadcast(itsIsRunning); + + // sync updated stop times -- abuse atomicity of copying itsRequestedStopTime + itsStopTime = itsRequestedStopTime; + broadcast(itsStopTime); + } + + bool done = !itsIsRunning; + + if (itsStopTime > 0.0) { + // start time of last processed block + double currentTime = itsParset.startTime() + itsBlockNumber * itsParset.CNintegrationTime(); + + done = done || currentTime >= itsStopTime; + } + + itsBlockNumber ++; + + return !done; +} + + +template <typename SAMPLE_TYPE> void Job::doObservation() +{ + std::vector<OutputSection *> outputSections; + + if (LOG_CONDITION) + LOG_INFO_STR(itsLogPrefix << "----- Observation start"); + + // first: send configuration to compute nodes so they know what to expect +#if defined CLUSTER_SCHEDULING + if (myPsetNumber == 0) + configureCNs(); +#else + if (!agree(configureCNs())) { + unconfigureCNs(); + + if (LOG_CONDITION) + LOG_INFO_STR(itsLogPrefix << "----- Observation finished"); + + return; + } +#endif + + if (itsHasPhaseOne) + attachToInputSection<SAMPLE_TYPE>(); + + if (itsHasPhaseTwo) { + if (itsParset.outputFilteredData()) + outputSections.push_back(new FilteredDataOutputSection(itsParset, createCNstream)); + + if (itsParset.outputCorrelatedData()) + outputSections.push_back(new CorrelatedDataOutputSection(itsParset, createCNstream)); + + if (itsParset.outputIncoherentStokes()) + outputSections.push_back(new IncoherentStokesOutputSection(itsParset, createCNstream)); + } + + if (itsHasPhaseThree) { + if (itsParset.outputBeamFormedData()) + outputSections.push_back(new BeamFormedDataOutputSection(itsParset, createCNstream)); + + if (itsParset.outputCoherentStokes()) + outputSections.push_back(new CoherentStokesOutputSection(itsParset, createCNstream)); + + if (itsParset.outputTrigger()) + outputSections.push_back(new TriggerDataOutputSection(itsParset, createCNstream)); + } + + LOG_DEBUG_STR(itsLogPrefix << "doObservation processing input start"); + + { // separate scope to ensure that the beamletbuffertocomputenode objects + // only exist if the beamletbuffers exist in the inputsection + std::vector<SmartPtr<BeamletBuffer<SAMPLE_TYPE> > > noInputs; + BeamletBufferToComputeNode<SAMPLE_TYPE> beamletBufferToComputeNode(itsParset, itsPhaseOneTwoCNstreams, itsHasPhaseOne ? static_cast<InputSection<SAMPLE_TYPE> *>(theInputSection)->itsBeamletBuffers : noInputs, myPsetNumber); + + ControlPhase3Cores controlPhase3Cores(itsParset, itsPhaseThreeCNstreams); + + while (anotherRun()) { + for (unsigned i = 0; i < outputSections.size(); i ++) + outputSections[i]->addIterations(1); + + controlPhase3Cores.addIterations(1); + + beamletBufferToComputeNode.process(); + } + + LOG_DEBUG_STR(itsLogPrefix << "doObservation processing input done"); + } + + for (unsigned i = 0; i < outputSections.size(); i ++) + outputSections[i]->noMoreIterations(); + + for (unsigned i = 0; i < outputSections.size(); i ++) + delete outputSections[i]; + + if (itsHasPhaseOne) + detachFromInputSection<SAMPLE_TYPE>(); + +#if defined CLUSTER_SCHEDULING + if (myPsetNumber == 0) +#endif + unconfigureCNs(); + + if (LOG_CONDITION) + LOG_INFO_STR(itsLogPrefix << "----- Observation finished"); +} + + +bool Job::checkParset() const +{ + // any error detected by the python environment, invalidating this parset + string pythonParsetError = itsParset.getString("OLAP.IONProc.parsetError",""); + + if (pythonParsetError != "" ) { + LOG_ERROR_STR(itsLogPrefix << "Early detected parset error: " << pythonParsetError ); + return false; + } + + try { + itsParset.check(); + } catch( InterfaceException &ex ) { + LOG_ERROR_STR(itsLogPrefix << "Parset check failed on " << ex.what() ); + return false; + } + + if (itsParset.nrCoresPerPset() > nrCNcoresInPset) { + LOG_ERROR_STR(itsLogPrefix << "nrCoresPerPset (" << itsParset.nrCoresPerPset() << ") cannot exceed " << nrCNcoresInPset); + return false; + } + + return true; +} + + +void Job::printInfo() const +{ + LOG_INFO_STR(itsLogPrefix << "JobID = " << itsJobID << ", " << (itsIsRunning ? "running" : "not running")); +} + + +// expected sequence: define -> init -> run -> pause -> release -> quit + +bool Job::define() +{ + LOG_DEBUG_STR(itsLogPrefix << "Job: define(): check parset"); + + return checkParset(); +} + + +bool Job::init() +{ + LOG_DEBUG_STR(itsLogPrefix << "Job: init(): allocate buffers / make connections"); + + return true; +} + + +bool Job::run() +{ + LOG_DEBUG_STR(itsLogPrefix << "Job: run(): run observation"); + + // we ignore this, since 'now' is both ill-defined and we need time + // to communicate changes to other psets + + return true; +} + + +bool Job::pause(const double &when) +{ + char buf[26]; + time_t whenRounded = static_cast<time_t>(when); + + ctime_r(&whenRounded, buf); + buf[24] = '\0'; + + LOG_DEBUG_STR(itsLogPrefix << "Job: pause(): pause observation at " << buf); + + // make sure we don't interfere with queue dynamics + ScopedLock scopedLock(jobQueue.itsMutex); + + if (itsParset.realTime() && (when == 0 || when <= itsParset.startTime())) { // yes we can compare a double to 0 + // make sure we also stop waiting for the job to start + + if (!itsDoCancel) + cancel(); + } else { + itsRequestedStopTime = when; + } + + return true; +} + + +bool Job::quit() +{ + LOG_DEBUG_STR(itsLogPrefix << "Job: quit(): end observation"); + // stop now + + if (!itsDoCancel) { + ScopedLock scopedLock(jobQueue.itsMutex); + + cancel(); + } + + return true; +} + + +bool Job::observationRunning() +{ + LOG_DEBUG_STR(itsLogPrefix << "Job: observationRunning()"); + return itsIsRunning; +} + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/GPUProc/src/Job.h b/RTCP/GPUProc/src/Job.h new file mode 100644 index 0000000000000000000000000000000000000000..634c3cbe3b9e05b61658c28c2f1e0d17f5541461 --- /dev/null +++ b/RTCP/GPUProc/src/Job.h @@ -0,0 +1,133 @@ +//# Job.h +//# +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: ION_main.cc 15296 2010-03-24 10:19:41Z romein $ + + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + +#if !defined LOFAR_RTCP_JOB_H +#define LOFAR_RTCP_JOB_H + +#include <InputSection.h> +#include <Interface/Parset.h> +#include <Interface/SmartPtr.h> +#include <JobQueue.h> +#include <Stream/Stream.h> +#include <WallClockTime.h> +#include <Common/Thread/Mutex.h> +#include <Common/Thread/Queue.h> +#include <Common/Thread/Thread.h> +#include <PLCClient.h> + +#include <sys/time.h> + +#include <vector> +#include <string> + + +namespace LOFAR { +namespace RTCP { + + +class Job : public PLCRunnable +{ + public: + Job(const char *parsetName); + ~Job(); + + void cancel(); + void printInfo() const; + + const Parset itsParset; + const unsigned itsJobID, itsObservationID; + + // implement PLCRunnable + virtual bool define(); + virtual bool init(); + virtual bool run(); + virtual bool pause(const double &when); + virtual bool quit(); + virtual bool observationRunning(); + + private: + bool checkParset() const; + void createCNstreams(); + bool configureCNs(); + void unconfigureCNs(); + + void createIONstreams(); + void barrier(); + bool agree(bool iAgree); + template <typename T> void broadcast(T &); + + void claimResources(); + + bool anotherRun(); + + void jobThread(); + template <typename SAMPLE_TYPE> void doObservation(); + + template <typename SAMPLE_TYPE> void attachToInputSection(); + template <typename SAMPLE_TYPE> void detachFromInputSection(); + + static void execSSH(const char *sshKey, const char *userName, const char *hostName, const char *executable, const char *rank, const char *parset, const char *cwd, const char *isBigEndian); + static void forkSSH(const char *sshKey, const char *userName, const char *hostName, const char *executable, const char *rank, const char *parset, const char *cwd, const char *isBigEndian, int &storagePID); + void joinSSH(int childPID, const std::string &hostName, unsigned &timeout); + + void startStorageProcesses(); + void stopStorageProcesses(); + + void waitUntilCloseToStartOfObservation(time_t secondsPriorToStart); + + SmartPtr<Stream> itsPLCStream; + SmartPtr<PLCClient> itsPLCClient; + + std::string itsLogPrefix; + + std::vector<std::string> itsStorageHostNames; + std::vector<int> itsStoragePIDs; + + std::vector<Stream *> itsCNstreams, itsPhaseOneTwoCNstreams, itsPhaseThreeCNstreams; + std::vector<SmartPtr<Stream> > itsIONstreams; + bool itsHasPhaseOne, itsHasPhaseTwo, itsHasPhaseThree; + bool itsIsRunning, itsDoCancel; + + unsigned itsBlockNumber; + double itsRequestedStopTime, itsStopTime; + unsigned itsNrBlockTokens, itsNrBlockTokensPerBroadcast; + + static unsigned nextJobID; + + WallClockTime itsWallClockTime; + + static void *theInputSection; + static Mutex theInputSectionMutex; + static unsigned theInputSectionRefCount; + + SmartPtr<Thread> itsJobThread; +}; + + +extern Queue<Job *> finishedJobs; + + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/LockedRanges.h b/RTCP/GPUProc/src/LockedRanges.h new file mode 100644 index 0000000000000000000000000000000000000000..6e1ac8d2492f3e85d6a445af4f9deca5a0e93f1a --- /dev/null +++ b/RTCP/GPUProc/src/LockedRanges.h @@ -0,0 +1,95 @@ +//# Copyright (C) 2007 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: LockedRanges.h 17975 2011-05-10 09:52:51Z mol $ + +#ifndef LOFAR_GPUPROC_LOCKED_RANGES_H +#define LOFAR_GPUPROC_LOCKED_RANGES_H + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + +#include <Common/LofarLogger.h> +#include <Interface/SparseSet.h> +#include <Common/Thread/Condition.h> +#include <Common/Thread/Mutex.h> + + +namespace LOFAR { +namespace RTCP { + +class LockedRanges +{ + public: + LockedRanges(unsigned bufferSize); + + void lock(unsigned begin, unsigned end); + void unlock(unsigned begin, unsigned end); + + private: + SparseSet<unsigned> itsLockedRanges; + Mutex itsMutex; + Condition itsRangeUnlocked; + const unsigned itsBufferSize; +}; + + +inline LockedRanges::LockedRanges(unsigned bufferSize) +: + itsBufferSize(bufferSize) +{ +} + + +inline void LockedRanges::lock(unsigned begin, unsigned end) +{ + ScopedLock scopedLock(itsMutex); + + if (begin < end) { + while (itsLockedRanges.subset(begin, end).count() > 0) { + LOG_WARN_STR("Circular buffer: reader & writer try to use overlapping sections, range to lock = (" << begin << ", " << end << "), already locked = " << itsLockedRanges); + itsRangeUnlocked.wait(itsMutex); + } + + itsLockedRanges.include(begin, end); + } else { + while (itsLockedRanges.subset(begin, itsBufferSize).count() > 0 || itsLockedRanges.subset(0, end).count() > 0) { + LOG_WARN_STR("Circular buffer: reader & writer try to use overlapping sections, range to lock = (" << begin << ", " << end << "), already locked = " << itsLockedRanges); + itsRangeUnlocked.wait(itsMutex); + } + + itsLockedRanges.include(begin, itsBufferSize).include(0, end); + } +} + + +inline void LockedRanges::unlock(unsigned begin, unsigned end) +{ + ScopedLock scopedLock(itsMutex); + + if (begin < end) + itsLockedRanges.exclude(begin, end); + else + itsLockedRanges.exclude(end, itsBufferSize).exclude(0, begin); + + itsRangeUnlocked.broadcast(); +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/LogThread.cc b/RTCP/GPUProc/src/LogThread.cc new file mode 100644 index 0000000000000000000000000000000000000000..388186b0b1bc1389c0760b23c84cdbf66c02bfcf --- /dev/null +++ b/RTCP/GPUProc/src/LogThread.cc @@ -0,0 +1,183 @@ +//# LogThread.cc: +//# +//# Copyright (C) 2008 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: LogThread.cc 17975 2011-05-10 09:52:51Z mol $ + +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +#include <LogThread.h> +#include <Scheduling.h> +#include <Interface/PrintVector.h> +#include <Common/LofarLogger.h> +#include <Common/Thread/Cancellation.h> + +#include <algorithm> +#include <cstdio> +#include <sstream> + +#include <unistd.h> + + +namespace LOFAR { +namespace RTCP { + + +// log from separate thread, since printing from a signal handler causes deadlocks + +LogThread::LogThread(unsigned nrRspBoards, std::string stationName) +: + itsCounters(nrRspBoards), + itsStationName(stationName), + itsThread(this, &LogThread::mainLoop, "[LogThread] ", 65536) +{ +} + + +LogThread::~LogThread() +{ + itsThread.cancel(); + LOG_DEBUG_STR("[LogThread] finished"); +} + + +#if defined HAVE_BGP_ION + +bool LogThread::readCPUstats(struct CPUload &load) +{ + FILE *file = fopen("/proc/stat", "r"); + int retval; + + if (file == 0) + return false; + + // make sure the file is always closed -- even on cancellation (fscanf CAN be a cancellation point) + struct D { + ~D() { + fclose(file); + } + + FILE *file; + } onDestruct = { file }; + (void)onDestruct; + + do + retval = fscanf(file, "cpu %llu %*u %llu %llu %*u %*u %llu %*u\n", &load.user, &load.system, &load.idle, &load.interrupt); + while (retval != 4 && retval != EOF); + + do + retval = fscanf(file, "cpu0 %*u %*u %*u %llu %*u %*u %*u %*u\n", &load.idle0); + while (retval != 1 && retval != EOF); + + return retval != EOF; +} + + +void LogThread::writeCPUstats(std::stringstream &str) +{ + struct CPUload load; + struct timeval tv; + + if (readCPUstats(load)) { + gettimeofday( &tv, 0 ); + + float timediff = (tv.tv_sec - previousTimeval.tv_sec) + (tv.tv_usec - previousTimeval.tv_usec)/1.0e6; + + //str << ", us/sy/in/id: [" + str << ", us/sy/in/id(0): [" + << fixed << setprecision(0) + << (unsigned(load.user - previousLoad.user) + 2) / 4 / timediff << '/' + << (unsigned(load.system - previousLoad.system) + 2) / 4 / timediff << '/' + << (unsigned(load.interrupt - previousLoad.interrupt) + 2) / 4 / timediff << '/' + << (unsigned(load.idle - previousLoad.idle) + 2) / 4 / timediff << '(' + << (unsigned(load.idle0 - previousLoad.idle0) / timediff) << ")]"; +#if 0 + << "], id: [" + << (unsigned(load.idlePerCore[0] - previousLoad.idlePerCore[0]) << '/' + + for (unsigned cpu = 0; cpu < 4; cpu ++) + str << unsigned(load.idle[cpu] - previousLoad.idle[cpu]) + << (cpu == 3 ? ']' : ','); +#endif + + previousLoad = load; + previousTimeval = tv; + } else { + str << ", no CPU load info"; + } +} + +#endif + + +void LogThread::mainLoop() +{ +#if defined HAVE_BGP_ION + runOnCore0(); + readCPUstats(previousLoad); + gettimeofday(&previousTimeval,0); +#endif + + //LOG_DEBUG("LogThread running"); + + // non-atomic updates from other threads cause race conditions, but who cares + + while (true) { + std::stringstream logStr; + std::vector<unsigned> counts(itsCounters.size()); + + for (unsigned rsp = 0; rsp < itsCounters.size(); rsp ++) { + counts[rsp] = itsCounters[rsp].received; + itsCounters[rsp].received = 0; + } + + logStr << "[station " << itsStationName << "] "; + + logStr << "received packets = " << counts; + + for (unsigned rsp = 0; rsp < itsCounters.size(); rsp ++) { + counts[rsp] = itsCounters[rsp].badSize; + itsCounters[rsp].badSize = 0; + } + + if (static_cast<unsigned>(std::count(counts.begin(), counts.end(), 0U)) != counts.size()) + logStr << ", bad size = " << counts; + + for (unsigned rsp = 0; rsp < itsCounters.size(); rsp ++) { + counts[rsp] = itsCounters[rsp].badTimeStamp; + itsCounters[rsp].badTimeStamp = 0; + } + + if (static_cast<unsigned>(std::count(counts.begin(), counts.end(), 0U)) != counts.size()) + logStr << ", bad timestamps = " << counts; + +#if defined HAVE_BGP_ION + writeCPUstats(logStr); +#endif + + LOG_INFO_STR(logStr.str()); + sleep(15); + } + + //LOG_DEBUG("LogThread stopped"); +} + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/GPUProc/src/LogThread.h b/RTCP/GPUProc/src/LogThread.h new file mode 100644 index 0000000000000000000000000000000000000000..2da47b09f09ed422d734234926094802ce93190a --- /dev/null +++ b/RTCP/GPUProc/src/LogThread.h @@ -0,0 +1,77 @@ +//# LogThread.h: log from separate thread, since printing from a signal +//# handler causes deadlocks +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: LogThread.h 17975 2011-05-10 09:52:51Z mol $ + +#ifndef LOFAR_GPUPROC_LOG_THREAD_H +#define LOFAR_GPUPROC_LOG_THREAD_H + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + + +#include <Common/Thread/Thread.h> + +#include <vector> +#include <string> +#include <sys/time.h> + +namespace LOFAR { +namespace RTCP { + +class LogThread +{ + public: + LogThread(unsigned nrRspBoards, std::string stationName); + ~LogThread(); + + struct Counters { + unsigned received, badTimeStamp, badSize; + unsigned pad[5]; // pad to cache line size to avoid false sharing + }; + + std::vector<Counters> itsCounters; + + private: + void mainLoop(); + + std::string itsStationName; + + Thread itsThread; + +#if defined HAVE_BGP_ION + struct CPUload { + //unsigned long long user, system, interrupt, idle, idlePerCore[4]; + unsigned long long user, system, interrupt, idle, idle0; + } previousLoad; + + struct timeval previousTimeval; + + bool readCPUstats(struct CPUload &load); + void writeCPUstats(std::stringstream &str); +#endif + }; + + // @} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/NewCorrelator.cl b/RTCP/GPUProc/src/NewCorrelator.cl new file mode 100644 index 0000000000000000000000000000000000000000..307c70f347743ea6201b6fd2a87fad59ad62812d --- /dev/null +++ b/RTCP/GPUProc/src/NewCorrelator.cl @@ -0,0 +1,151 @@ +#define NR_STATIONS_PER_BLOCK 32 +#define BLOCK_SIZE 8 + +#define NR_BASELINES (NR_STATIONS * (NR_STATIONS + 1) / 2) + + +typedef __global float (*CorrectedDataType)[NR_STATIONS][NR_CHANNELS][NR_SAMPLES_PER_CHANNEL][NR_POLARIZATIONS * 2]; +typedef __global float8 (*VisibilitiesType)[NR_BASELINES][NR_CHANNELS]; + + +#if 0 +__kernel void correlateTriangles(__global void *visibilitiesPtr, + __global const void *correctedDataPtr +) +{ + VisibilitiesType visibilities = (VisibilitiesType) visibilitiesPtr; + CorrectedDataType correctedData = (CorrectedDataType) correctedDataPtr; + + __local float4 samples[BLOCK_SIZE][NR_STATIONS_PER_BLOCK]; + + uint triangle = get_global_id(1); + uint channel = get_global_id(2); + uint firstStation = triangle * NR_STATIONS_PER_BLOCK; + + float4 vis_0A_r = (float4) 0, vis_0A_i = (float4) 0; + float4 vis_0B_r = (float4) 0, vis_0B_i = (float4) 0; + float4 vis_1A_r = (float4) 0, vis_1A_i = (float4) 0; + float4 vis_1B_r = (float4) 0, vis_1B_i = (float4) 0; + + for (uint major = 0; major < NR_SAMPLES_PER_CHANNEL; major += BLOCK_SIZE) { + // load data into local memory +#pragma unroll 1 + for (uint i = get_local_id(0); i < BLOCK_SIZE * NR_STATIONS_PER_BLOCK; i += get_local_size(0)) { + uint time = i % BLOCK_SIZE; + uint stat = i / BLOCK_SIZE; + + if (firstStation + stat < NR_STATIONS) + samples[time][stat] = (*correctedData)[firstStation + stat][channel][major + time]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + // compute auto-correlations + + if (firstStation + get_local_id(0) < NR_STATIONS) { + for (time = 0; time + BLOCK_SIZE; time ++) { + float sample = samples[time][get_local_id(0)]; + } + + + barrier(CLK_LOCAL_MEM_FENCE); + } +} +#endif + + +__kernel __attribute__((reqd_work_group_size(NR_STATIONS_PER_BLOCK * NR_STATIONS_PER_BLOCK / 4, 1, 1))) +void correlateRectangles(__global void *visibilitiesPtr, + __global const void *correctedDataPtr +) +{ + VisibilitiesType visibilities = (VisibilitiesType) visibilitiesPtr; + CorrectedDataType correctedData = (CorrectedDataType) correctedDataPtr; + + __local float4 samplesX[2][BLOCK_SIZE][NR_STATIONS_PER_BLOCK / 2 | 1]; + __local float4 samplesY[2][BLOCK_SIZE][NR_STATIONS_PER_BLOCK / 2 | 1]; + + uint block = get_global_id(1); + uint channel = get_global_id(2); + uint blockX = convert_uint_rtz(sqrt(convert_float(8 * block + 1)) - 0.99999f) / 2; + uint blockY = block - blockX * (blockX + 1) / 2; + +#if NR_STATIONS % NR_STATIONS_PER_BLOCK == 0 + uint firstStationX = (blockX + 1) * NR_STATIONS_PER_BLOCK; + uint firstStationY = blockY * NR_STATIONS_PER_BLOCK; +#else + uint firstStationX = blockX * NR_STATIONS_PER_BLOCK + NR_STATIONS % NR_STATIONS_PER_BLOCK; + int firstStationY = (blockY - 1) * NR_STATIONS_PER_BLOCK + NR_STATIONS % NR_STATIONS_PER_BLOCK; +#endif + + uint statXoffset = (get_local_id(0) / (NR_STATIONS_PER_BLOCK / 2)); + uint statYoffset = (get_local_id(0) % (NR_STATIONS_PER_BLOCK / 2)); + + float4 vis_0A_r = 0, vis_0A_i = 0; + float4 vis_0B_r = 0, vis_0B_i = 0; + float4 vis_1A_r = 0, vis_1A_i = 0; + float4 vis_1B_r = 0, vis_1B_i = 0; + + for (uint major = 0; major < NR_SAMPLES_PER_CHANNEL; major += BLOCK_SIZE) { + // load data into local memory + for (uint i = get_local_id(0); i < 4 * BLOCK_SIZE * NR_STATIONS_PER_BLOCK; i += NR_STATIONS_PER_BLOCK * NR_STATIONS_PER_BLOCK / 4) { + uint p = i % 4; + uint time = i / 4 % BLOCK_SIZE; + uint stat = i / 4 / BLOCK_SIZE; + + ((__local float *) &samplesX[stat % 2][time][stat / 2])[p] = (*correctedData)[firstStationX + stat][channel][major + time][p]; + + if (NR_STATIONS % NR_STATIONS_PER_BLOCK == 0 || (int) (firstStationY + stat) >= 0) + ((__local float *) &samplesY[stat % 2][time][stat / 2])[p] = (*correctedData)[firstStationY + stat][channel][major + time][p]; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (NR_STATIONS % NR_STATIONS_PER_BLOCK == 0 || (int) (firstStationY + 2 * statYoffset) >= -1) { + for (uint time = 0; time < BLOCK_SIZE; time ++) { + float4 sample_0 = samplesY[0][time][statYoffset]; + float4 sample_A = samplesX[0][time][statXoffset]; + float4 sample_B = samplesX[1][time][statXoffset]; + float4 sample_1 = samplesY[1][time][statYoffset]; + + vis_0A_r += sample_0.xxzz * sample_A.xzxz; + vis_0A_i += sample_0.yyww * sample_A.xzxz; + vis_0B_r += sample_0.xxzz * sample_B.xzxz; + vis_0B_i += sample_0.yyww * sample_B.xzxz; + vis_1A_r += sample_1.xxzz * sample_A.xzxz; + vis_1A_i += sample_1.yyww * sample_A.xzxz; + vis_1B_r += sample_1.xxzz * sample_B.xzxz; + vis_1B_i += sample_1.yyww * sample_B.xzxz; + + vis_0A_r += sample_0.yyww * sample_A.ywyw; + vis_0A_i -= sample_0.xxzz * sample_A.ywyw; + vis_0B_r += sample_0.yyww * sample_B.ywyw; + vis_0B_i -= sample_0.xxzz * sample_B.ywyw; + vis_1A_r += sample_1.yyww * sample_A.ywyw; + vis_1A_i -= sample_1.xxzz * sample_A.ywyw; + vis_1B_r += sample_1.yyww * sample_B.ywyw; + vis_1B_i -= sample_1.xxzz * sample_B.ywyw; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + + int statY = firstStationY + 2 * statYoffset; + uint statX = firstStationX + 2 * statXoffset; + uint baseline = (statX * (statX + 1) / 2) + statY; + + if (NR_STATIONS % NR_STATIONS_PER_BLOCK == 0 || statY >= 0) { + (*visibilities)[baseline ][channel].even = vis_0A_r; + (*visibilities)[baseline ][channel].odd = vis_0A_i; + (*visibilities)[baseline + 1][channel].even = vis_1A_r; + (*visibilities)[baseline + 1][channel].odd = vis_1A_i; + } + + if (NR_STATIONS % NR_STATIONS_PER_BLOCK == 0 || statY >= -1) { + (*visibilities)[baseline + statX + 1][channel].even = vis_0B_r; + (*visibilities)[baseline + statX + 1][channel].odd = vis_0B_i; + (*visibilities)[baseline + statX + 2][channel].even = vis_1B_r; + (*visibilities)[baseline + statX + 2][channel].odd = vis_1B_i; + } +} diff --git a/RTCP/GPUProc/src/NewCorrelator.cl-0.ptx b/RTCP/GPUProc/src/NewCorrelator.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..c69f71f47ba5748d5cb342ecbed29c627f766f0f Binary files /dev/null and b/RTCP/GPUProc/src/NewCorrelator.cl-0.ptx differ diff --git a/RTCP/GPUProc/src/OpenCL_Support.cc b/RTCP/GPUProc/src/OpenCL_Support.cc new file mode 100644 index 0000000000000000000000000000000000000000..ef32c2c6dabbabd75b402600f9e19b834345a11b --- /dev/null +++ b/RTCP/GPUProc/src/OpenCL_Support.cc @@ -0,0 +1,186 @@ +#include "lofar_config.h" + +#include "OpenCL_Support.h" + +#include <cstdlib> +#include <cstring> +#include <fstream> +#include <iomanip> +#include <iostream> + +namespace LOFAR { +namespace RTCP { + +const char *errorMessage(cl_int error) +{ + switch (error) { + case CL_SUCCESS: return "Success!"; + case CL_DEVICE_NOT_FOUND: return "Device not found."; + case CL_DEVICE_NOT_AVAILABLE: return "Device not available"; + case CL_COMPILER_NOT_AVAILABLE: return "Compiler not available"; + case CL_MEM_OBJECT_ALLOCATION_FAILURE: return "Memory object allocation failure"; + case CL_OUT_OF_RESOURCES: return "Out of resources"; + case CL_OUT_OF_HOST_MEMORY: return "Out of host memory"; + case CL_PROFILING_INFO_NOT_AVAILABLE: return "Profiling information not available"; + case CL_MEM_COPY_OVERLAP: return "Memory copy overlap"; + case CL_IMAGE_FORMAT_MISMATCH: return "Image format mismatch"; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: return "Image format not supported"; + case CL_BUILD_PROGRAM_FAILURE: return "Program build failure"; + case CL_MAP_FAILURE: return "Map failure"; + case CL_INVALID_VALUE: return "Invalid value"; + case CL_INVALID_DEVICE_TYPE: return "Invalid device type"; + case CL_INVALID_PLATFORM: return "Invalid platform"; + case CL_INVALID_DEVICE: return "Invalid device"; + case CL_INVALID_CONTEXT: return "Invalid context"; + case CL_INVALID_QUEUE_PROPERTIES: return "Invalid queue properties"; + case CL_INVALID_COMMAND_QUEUE: return "Invalid command queue"; + case CL_INVALID_HOST_PTR: return "Invalid host pointer"; + case CL_INVALID_MEM_OBJECT: return "Invalid memory object"; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: return "Invalid image format descriptor"; + case CL_INVALID_IMAGE_SIZE: return "Invalid image size"; + case CL_INVALID_SAMPLER: return "Invalid sampler"; + case CL_INVALID_BINARY: return "Invalid binary"; + case CL_INVALID_BUILD_OPTIONS: return "Invalid build options"; + case CL_INVALID_PROGRAM: return "Invalid program"; + case CL_INVALID_PROGRAM_EXECUTABLE: return "Invalid program executable"; + case CL_INVALID_KERNEL_NAME: return "Invalid kernel name"; + case CL_INVALID_KERNEL_DEFINITION: return "Invalid kernel definition"; + case CL_INVALID_KERNEL: return "Invalid kernel"; + case CL_INVALID_ARG_INDEX: return "Invalid argument index"; + case CL_INVALID_ARG_VALUE: return "Invalid argument value"; + case CL_INVALID_ARG_SIZE: return "Invalid argument size"; + case CL_INVALID_KERNEL_ARGS: return "Invalid kernel arguments"; + case CL_INVALID_WORK_DIMENSION: return "Invalid work dimension"; + case CL_INVALID_WORK_GROUP_SIZE: return "Invalid work group size"; + case CL_INVALID_WORK_ITEM_SIZE: return "Invalid work item size"; + case CL_INVALID_GLOBAL_OFFSET: return "Invalid global offset"; + case CL_INVALID_EVENT_WAIT_LIST: return "Invalid event wait list"; + case CL_INVALID_EVENT: return "Invalid event"; + case CL_INVALID_OPERATION: return "Invalid operation"; + case CL_INVALID_GL_OBJECT: return "Invalid OpenGL object"; + case CL_INVALID_BUFFER_SIZE: return "Invalid buffer size"; + case CL_INVALID_MIP_LEVEL: return "Invalid mip-map level"; + default: return "Unknown"; + } +} + + +void createContext(cl::Context &context, std::vector<cl::Device> &devices) +{ + const char *platformName = getenv("PLATFORM"); + +#if defined __linux__ + if (platformName == 0) +#endif + platformName = "NVIDIA CUDA"; + //platformName = "AMD Accelerated Parallel Processing"; + + cl_device_type type = CL_DEVICE_TYPE_DEFAULT; + + const char *deviceType = getenv("TYPE"); + + if (deviceType != 0) { + if (strcmp(deviceType, "GPU") == 0) + type = CL_DEVICE_TYPE_GPU; + else if (strcmp(deviceType, "CPU") == 0) + type = CL_DEVICE_TYPE_CPU; + else + std::cerr << "warning: unrecognized device type" << std::endl; + } + + const char *deviceName = getenv("DEVICE"); + + std::vector<cl::Platform> platforms; + cl::Platform::get(&platforms); + + for (std::vector<cl::Platform>::iterator platform = platforms.begin(); platform != platforms.end(); platform ++) { + std::cout << "Platform profile: " << platform->getInfo<CL_PLATFORM_PROFILE>() << std::endl; + std::cout << "Platform name: " << platform->getInfo<CL_PLATFORM_NAME>() << std::endl; + std::cout << "Platform version: " << platform->getInfo<CL_PLATFORM_VERSION>() << std::endl; + std::cout << "Platform extensions: " << platform->getInfo<CL_PLATFORM_EXTENSIONS>() << std::endl; + } + + for (std::vector<cl::Platform>::iterator platform = platforms.begin(); platform != platforms.end(); platform ++) { + if (platform->getInfo<CL_PLATFORM_NAME>() == platformName) { + platform->getDevices(type, &devices); + + if (deviceName != 0) + for (std::vector<cl::Device>::iterator device = devices.end(); -- device >= devices.begin();) + if (device->getInfo<CL_DEVICE_NAME>() != deviceName) + devices.erase(device); + + for (std::vector<cl::Device>::iterator device = devices.begin(); device != devices.end(); device ++) { + std::cout << "device: " << device->getInfo<CL_DEVICE_NAME>() << std::endl; + std::cout << "max mem: " << device->getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>() << std::endl; + } + + cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(*platform)(), 0 }; + context = cl::Context(type, cps); + return; + } + } + + std::cerr << "Platform not found" << std::endl; + exit(1); +} + + +cl::Program createProgram(cl::Context &context, std::vector<cl::Device> &devices, const char *sources, const char *args) +{ + std::ifstream kernelStream(sources); + std::string kernelSource((std::istreambuf_iterator<char>(kernelStream)), std::istreambuf_iterator<char>()); + cl::Program::Sources source(1, std::make_pair(kernelSource.data(), kernelSource.size())); + cl::Program program(context, source); + + try { + program.build(devices, args); + std::string msg; + program.getBuildInfo(devices[0], CL_PROGRAM_BUILD_LOG, &msg); +#pragma omp critical (cout) + std::cout << msg; + } catch (cl::Error &error) { + if (strcmp(error.what(), "clBuildProgram") == 0) { + std::string msg; + program.getBuildInfo(devices[0], CL_PROGRAM_BUILD_LOG, &msg); +#pragma omp critical (cerr) + std::cerr << msg << std::endl; + exit(1); + } else { + throw; + } + } + +#if 1 + std::vector<size_t> binarySizes = program.getInfo<CL_PROGRAM_BINARY_SIZES>(); +#if 0 + // cl::Program::getInfo<> cl.hpp broken + std::vector<char *> binaries = program.getInfo<CL_PROGRAM_BINARIES>(); +#else + std::vector<char *> binaries(binarySizes.size()); + + for (unsigned b = 0; b < binaries.size(); b ++) + binaries[b] = new char[binarySizes[b]]; + + cl_int error = clGetProgramInfo(program(), CL_PROGRAM_BINARIES, binaries.size() * sizeof(char *), &binaries[0], 0); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clGetProgramInfo"); // FIXME: cleanup binaries[*] +#endif + + for (unsigned i = 0; i < 1 /*binaries.size()*/; i ++) { + std::stringstream filename; + filename << sources << '-' << i << ".ptx"; + std::ofstream(filename.str().c_str(), std::ofstream::binary).write(binaries[i], binarySizes[i]); + } + +#if 1 + for (unsigned b = 0; b < binaries.size(); b ++) + delete [] binaries[b]; +#endif +#endif + + return program; +} + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/GPUProc/src/OpenCL_Support.h b/RTCP/GPUProc/src/OpenCL_Support.h new file mode 100644 index 0000000000000000000000000000000000000000..51b33660316d4c0c035da2a2d78395aa9a49f0ef --- /dev/null +++ b/RTCP/GPUProc/src/OpenCL_Support.h @@ -0,0 +1,229 @@ +#if !defined OPEN_CL_SUPPORT_H +#define OPEN_CL_SUPPORT_H + +#define __CL_ENABLE_EXCEPTIONS +#include <CL/cl.hpp> + +#include <boost/multi_array.hpp> +#include <vector> + + +namespace LOFAR { +namespace RTCP { + +extern const char *errorMessage(cl_int error); +extern void createContext(cl::Context &, std::vector<cl::Device> &); +extern cl::Program createProgram(cl::Context &, std::vector<cl::Device> &, const char *sources, const char *args); + + +template <class T> class HostBufferAllocator +{ + public: + // type definitions + typedef T value_type; + typedef T *pointer; + typedef const T *const_pointer; + typedef T &reference; + typedef const T &const_reference; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + // rebind allocator to type U + template <class U> struct rebind { + typedef HostBufferAllocator<U> other; + }; + + // return address of values + pointer address(reference value) const + { + return &value; + } + + const_pointer address(const_reference value) const + { + return &value; + } + + // constructors and destructor + // - nothing to do because the allocator has no state + HostBufferAllocator(cl::CommandQueue &queue, cl_mem_flags flags = CL_MEM_READ_WRITE) throw() + : + queue(queue), + flags(flags) + { + } + + HostBufferAllocator(const HostBufferAllocator &other) throw() + : + queue(other.queue), + flags(other.flags) + { + } + + template <class U> HostBufferAllocator(const HostBufferAllocator<U> &other) throw() + : + queue(other.queue), + flags(other.flags) + { + } + + ~HostBufferAllocator() throw() + { + } + + // return maximum number of elements that can be allocated + size_type max_size() const throw() + { + return queue.getInfo<CL_QUEUE_DEVICE>().getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>() / sizeof(T); + } + + // allocate but don't initialize num elements of type T + pointer allocate(size_type num, const void * = 0) + { + buffer = cl::Buffer(queue.getInfo<CL_QUEUE_CONTEXT>(), flags | CL_MEM_ALLOC_HOST_PTR, num * sizeof(T)); + return static_cast<pointer>(queue.enqueueMapBuffer(buffer, CL_TRUE, flags & CL_MEM_READ_WRITE ? CL_MAP_READ | CL_MAP_WRITE : flags & CL_MEM_READ_ONLY ? CL_MAP_READ : flags & CL_MEM_WRITE_ONLY ? CL_MAP_WRITE : 0, 0, num * sizeof(T))); + } + + // deallocate storage p of deleted elements + void deallocate(pointer ptr, size_type /*num*/) + { + queue.enqueueUnmapMemObject(buffer, ptr); + } + + // initialize elements of allocated storage p with value value + void construct(pointer p, const T& value) + { + // initialize memory with placement new + new ((void *) p) T(value); + } + + // destroy elements of initialized storage p + void destroy(pointer p) + { + // destroy objects by calling their destructor + p->~T(); + } + + cl::CommandQueue queue; + cl_mem_flags flags; + cl::Buffer buffer; +}; + +// +// return that all specializations of this allocator are interchangeable +template <class T1, class T2> bool operator == (const HostBufferAllocator<T1> &, const HostBufferAllocator<T2> &) throw() +{ + return true; +} + + +template <class T1, class T2> bool operator != (const HostBufferAllocator<T1> &, const HostBufferAllocator<T2> &) throw() +{ + return false; +} + + +template <typename T, std::size_t DIM> class MultiArrayHostBuffer : public boost::multi_array<T, DIM, HostBufferAllocator<T> > +{ + public: + template <typename ExtentList> MultiArrayHostBuffer(const ExtentList &extents, cl::CommandQueue &queue, cl_mem_flags flags) + : + boost::multi_array<T, DIM, HostBufferAllocator<T> >(extents, boost::c_storage_order(), HostBufferAllocator<T>(queue, flags)) + { + } + + size_t bytesize() const + { + return this->num_elements() * sizeof(T); + } +}; + + +template <typename T> class VectorHostBuffer : public std::vector<T, HostBufferAllocator<T> > +{ + public: + VectorHostBuffer(size_t size, cl::CommandQueue &queue, cl_mem_flags flags) + : + std::vector<T, HostBufferAllocator<T> >(size, T(), HostBufferAllocator<T>(queue, flags)) + { + } +}; + + +#if 0 +template <typename T, std::size_t DIM> class MultiArraySharedBuffer +{ + public: + template <typename ExtentList> MultiArraySharedBuffer(const ExtentList &extents, cl::CommandQueue &queue, cl_mem_flags hostBufferFlags, cl_mem_flags deviceBufferFlags) + : + hostBuffer(extents, queue, hostBufferFlags), + deviceBuffer(queue.getInfo<CL_QUEUE_CONTEXT>(), deviceBufferFlags, hostBuffer.num_elements() * sizeof(T)), + queue(queue) + { + } + + void hostToGPU(cl_bool synchronous = CL_FALSE) + { + queue.enqueueWriteBuffer(deviceBuffer, synchronous, 0, hostBuffer.num_elements() * sizeof(T), hostBuffer.origin(), 0, &event); + } + + void GPUtoHost(cl_bool synchronous = CL_FALSE) + { + queue.enqueueReadBuffer(deviceBuffer, synchronous, 0, hostBuffer.num_elements() * sizeof(T), hostBuffer.origin(), 0, &event); + } + + operator cl::Buffer & () + { + return deviceBuffer; + } + + MultiArrayHostBuffer<T, DIM> hostBuffer; + cl::Buffer deviceBuffer; + cl::CommandQueue queue; + cl::Event event; +}; +#else +template <typename T, std::size_t DIM> class MultiArraySharedBuffer : public MultiArrayHostBuffer<T, DIM> +{ + public: + template <typename ExtentList> MultiArraySharedBuffer(const ExtentList &extents, cl::CommandQueue &queue, cl_mem_flags hostBufferFlags, cl_mem_flags deviceBufferFlags) + : + MultiArrayHostBuffer<T, DIM>(extents, queue, hostBufferFlags), + deviceBuffer(queue.getInfo<CL_QUEUE_CONTEXT>(), deviceBufferFlags, this->bytesize()), + queue(queue) + { + } + + template <typename ExtentList> MultiArraySharedBuffer(const ExtentList &extents, cl::CommandQueue &queue, cl_mem_flags hostBufferFlags, cl::Buffer &devBuffer) + : + MultiArrayHostBuffer<T, DIM>(extents, queue, hostBufferFlags), + deviceBuffer(devBuffer), + queue(queue) + { + } + + void hostToDevice(cl_bool synchronous = CL_FALSE) + { + queue.enqueueWriteBuffer(deviceBuffer, synchronous, 0, this->bytesize(), this->origin(), 0, &event); + } + + void deviceToHost(cl_bool synchronous = CL_FALSE) + { + queue.enqueueReadBuffer(deviceBuffer, synchronous, 0, this->bytesize(), this->origin(), 0, &event); + } + + operator cl::Buffer & () + { + return deviceBuffer; + } + + cl::Buffer deviceBuffer; + cl::CommandQueue queue; + cl::Event event; +}; +#endif + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/OpenMP_Support.h b/RTCP/GPUProc/src/OpenMP_Support.h new file mode 100644 index 0000000000000000000000000000000000000000..cd02df475e62d38b51f13812e05558ae8c5e0662 --- /dev/null +++ b/RTCP/GPUProc/src/OpenMP_Support.h @@ -0,0 +1,54 @@ +#if !defined OPEN_MP_SUPPORT_H +#define OPEN_MP_SUPPORT_H + +#include <omp.h> + + +class OMP_Lock +{ + public: + OMP_Lock() + { + omp_init_lock(&omp_lock); + } + + ~OMP_Lock() + { + omp_destroy_lock(&omp_lock); + } + + void lock() + { + omp_set_lock(&omp_lock); + } + + void unlock() + { + omp_unset_lock(&omp_lock); + } + + private: + omp_lock_t omp_lock; +}; + + +class OMP_ScopedLock +{ + public: + OMP_ScopedLock(OMP_Lock &omp_lock) + : + omp_lock(omp_lock) + { + omp_lock.lock(); + } + + ~OMP_ScopedLock() + { + omp_lock.unlock(); + } + + private: + OMP_Lock &omp_lock; +}; + +#endif diff --git a/RTCP/GPUProc/src/RSP.h b/RTCP/GPUProc/src/RSP.h new file mode 100644 index 0000000000000000000000000000000000000000..20158437fb9af2ef5dc3249bc535e8447f091aae --- /dev/null +++ b/RTCP/GPUProc/src/RSP.h @@ -0,0 +1,53 @@ +//# RSP: RSP data format +//# +//# Copyright (C) 2008 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: RSP.h 15169 2010-03-08 15:28:17Z romein $ + +#ifndef LOFAR_GPUPROC_RSP_H +#define LOFAR_GPUPROC_RSP_H + + +namespace LOFAR { +namespace RTCP { + +#include <cstddef> + + +// All data is in Little Endian format! + +struct RSP { + struct Header { + uint8_t version; + uint8_t sourceInfo; + uint16_t configuration; + uint16_t station; + uint8_t nrBeamlets; + uint8_t nrBlocks; + uint32_t timestamp; + uint32_t blockSequenceNumber; + } header; + + char data[8130]; +}; + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/RTCP.cc b/RTCP/GPUProc/src/RTCP.cc new file mode 100644 index 0000000000000000000000000000000000000000..029cd1632f9f503a7a9a6b59364db8428c24d722 --- /dev/null +++ b/RTCP/GPUProc/src/RTCP.cc @@ -0,0 +1,2040 @@ +#include "lofar_config.h" + +#define __CL_ENABLE_EXCEPTIONS +#include <CL/cl.hpp> + +#include <omp.h> + +#include <cmath> +#include <complex> +#include <cstdio> +#include <cstdlib> +#include <fstream> +#include <iomanip> +#include <iostream> +#include <sstream> +#include <boost/multi_array.hpp> + +#include "Align.h" +#include "BandPass.h" +#include "Common/lofar_datetime.h" +#include "Common/LofarLogger.h" +#include "Common/SystemUtil.h" +#include "FilterBank.h" +#include "InputSection.h" +#include "Interface/Parset.h" +#include "OpenCL_FFT/clFFT.h" +#include "OpenCL_Support.h" +#include "OpenMP_Support.h" +#include "UHEP/InvertedStationPPFWeights.h" +//#include "clAmdFft/include/clAmdFft.h" + +namespace LOFAR { +namespace RTCP { + +bool profiling = true; +unsigned nrGPUs; + +//#define NR_BITS_PER_SAMPLE 8 +#define NR_POLARIZATIONS 2 +#define NR_TAPS 16 +#define NR_STATION_FILTER_TAPS 16 + +#define USE_2X2 +#undef USE_CUSTOM_FFT +#undef USE_TEST_DATA +#undef USE_B7015 + + +double getTime() +{ + static double firstTime = 0.0; + +#if defined __linux__ + struct timeval tv; + + if (gettimeofday(&tv, 0) < 0) { + perror("gettimeofday"); + exit(1); + } + + double now = tv.tv_sec + tv.tv_usec / 1e6; +#elif defined _WIN32 || defined __WIN32__ || defined _WIN64 + static LARGE_INTEGER freq; + + if (firstTime == 0 && !QueryPerformanceFrequency(&freq)) + std::cerr << "No high-resolution timer available" << std::endl; + + LARGE_INTEGER time; + QueryPerformanceCounter(&time); + + double now = (double) time.QuadPart / (double) freq.QuadPart; +#endif + + if (firstTime == 0.0) + firstTime = now; + + return now - firstTime; +} + + +class PerformanceCounter +{ + public: + PerformanceCounter(const std::string &name); + ~PerformanceCounter(); + + void doOperation(cl::Event &, size_t nrOperations, size_t nrBytesRead, size_t nrBytesWritten); + + private: + static void eventCompleteCallBack(cl_event, cl_int /*status*/, void *counter); + + size_t totalNrOperations, totalNrBytesRead, totalNrBytesWritten; + double totalTime; + unsigned totalEvents; + const std::string name; +}; + + +PerformanceCounter::PerformanceCounter(const std::string &name) +: + totalNrOperations(0), + totalNrBytesRead(0), + totalNrBytesWritten(0), + totalTime(0), + totalEvents(0), + name(name) +{ +} + + +PerformanceCounter::~PerformanceCounter() +{ + if (totalTime > 0) +#pragma omp critical (cout) + std::cout << std::setw(12) << name + << std::setprecision(3) + << ": avg. time = " << 1000 * totalTime / totalEvents << " ms, " + "GFLOP/s = " << totalNrOperations / totalTime / 1e9 << ", " + "R/W = " << totalNrBytesRead / totalTime / 1e9 << '+' + << totalNrBytesWritten / totalTime / 1e9 << '=' + << (totalNrBytesRead + totalNrBytesWritten) / totalTime / 1e9 << " GB/s" + << std::endl; +} + + +void PerformanceCounter::eventCompleteCallBack(cl_event ev, cl_int /*status*/, void *counter) +{ + cl::Event event(ev); + + size_t queued, submitted, start, stop; + event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &queued); + event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &submitted); + event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start); + event.getProfilingInfo(CL_PROFILING_COMMAND_END, &stop); + double seconds = (stop - start) / 1e9; + + if (seconds < 0 || seconds > 15) +#pragma omp critical (cout) + std::cout << "BAH! " << omp_get_thread_num() << ": " << queued << ' ' << submitted - queued << ' ' << start - queued << ' ' << stop - queued << std::endl; + +#pragma omp atomic + static_cast<PerformanceCounter *>(counter)->totalTime += seconds; + + // cl::~Event() decreases ref count +} + + +void PerformanceCounter::doOperation(cl::Event &event, size_t nrOperations, size_t nrBytesRead, size_t nrBytesWritten) +{ + // reference count between C and C++ conversions is serously broken in C++ wrapper + cl_event ev = event(); + cl_int error = clRetainEvent(ev); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clRetainEvent"); + + if (profiling) { + event.setCallback(CL_COMPLETE, &PerformanceCounter::eventCompleteCallBack, this); + +#pragma omp atomic + totalNrOperations += nrOperations; +#pragma omp atomic + totalNrBytesRead += nrBytesRead; +#pragma omp atomic + totalNrBytesWritten += nrBytesWritten; +#pragma omp atomic + ++ totalEvents; + } +} + + +cl::Program createProgram(const Parset &ps, cl::Context &context, std::vector<cl::Device> &devices, const char *sources) +{ + std::stringstream args; + args << "-cl-fast-relaxed-math"; + + std::vector<cl_context_properties> properties; + context.getInfo(CL_CONTEXT_PROPERTIES, &properties); + + if (cl::Platform((cl_platform_id) properties[1]).getInfo<CL_PLATFORM_NAME>() == "NVIDIA CUDA") { + args << " -cl-nv-verbose"; + args << " -cl-nv-opt-level=99"; + //args << " -cl-nv-maxrregcount=63"; + args << " -DNVIDIA_CUDA"; + } + + //if (devices[0].getInfo<CL_DEVICE_NAME>() == "GeForce GTX 680") + //args << " -DUSE_FLOAT4_IN_CORRELATOR"; + + args << " -I" << dirname(__FILE__); + args << " -DNR_BITS_PER_SAMPLE=" << ps.nrBitsPerSample(); + args << " -DSUBBAND_BANDWIDTH=" << std::setprecision(7) << ps.subbandBandwidth() << 'f'; + args << " -DNR_SUBBANDS=" << ps.nrSubbands(); + args << " -DNR_CHANNELS=" << ps.nrChannelsPerSubband(); + args << " -DNR_STATIONS=" << ps.nrStations(); + args << " -DNR_SAMPLES_PER_CHANNEL=" << ps.nrSamplesPerChannel(); + args << " -DNR_SAMPLES_PER_SUBBAND=" << ps.nrSamplesPerSubband(); + args << " -DNR_BEAMS=" << ps.nrBeams(); + args << " -DNR_TABS=" << ps.nrTABs(); + args << " -DNR_COHERENT_STOKES=" << ps.nrCoherentStokes(); + args << " -DNR_INCOHERENT_STOKES=" << ps.nrIncoherentStokes(); + args << " -DCOHERENT_STOKES_TIME_INTEGRATION_FACTOR=" << ps.coherentStokesTimeIntegrationFactor(); + args << " -DINCOHERENT_STOKES_TIME_INTEGRATION_FACTOR=" << ps.incoherentStokesTimeIntegrationFactor(); + args << " -DNR_POLARIZATIONS=" << NR_POLARIZATIONS; + args << " -DNR_TAPS=" << NR_TAPS; + args << " -DNR_STATION_FILTER_TAPS=" << NR_STATION_FILTER_TAPS; + + if (ps.delayCompensation()) + args << " -DDELAY_COMPENSATION"; + + if (ps.correctBandPass()) + args << " -DBANDPASS_CORRECTION"; + + args << " -DDEDISPERSION_FFT_SIZE=" << ps.dedispersionFFTsize(); + return createProgram(context, devices, dirname(__FILE__).append("/").append(sources).c_str(), args.str().c_str()); +} + + +class FFT_Plan +{ + public: + FFT_Plan(cl::Context &context, unsigned fftSize) + { + clFFT_Dim3 dim = { fftSize, 1, 1 }; + cl_int error; + plan = clFFT_CreatePlan(context(), dim, clFFT_1D, clFFT_InterleavedComplexFormat, &error); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clFFT_CreatePlan"); + + //clFFT_DumpPlan(plan, stdout); + } + + ~FFT_Plan() + { + clFFT_DestroyPlan(plan); + } + + clFFT_Plan plan; +}; + + +class Pipeline +{ + public: + Pipeline(const Parset &); + + cl::Program createProgram(const char *sources); + + const Parset &ps; + cl::Context context; + std::vector<cl::Device> devices; + SmartPtr<InputSection<i16complex> > inputSection16; + SmartPtr<InputSection<i8complex> > inputSection8; + SmartPtr<InputSection<i4complex> > inputSection4; +}; + + +class CorrelatorPipeline : public Pipeline +{ + public: + CorrelatorPipeline(const Parset &); + + void doWork(); + + //private: + //friend class CorrelatorWorkQueue; + + FilterBank filterBank; + + cl::Program firFilterProgram, delayAndBandPassProgram, correlatorProgram; + PerformanceCounter firFilterCounter, delayAndBandPassCounter, correlatorCounter, fftCounter; + PerformanceCounter samplesCounter, visibilitiesCounter; + +#if defined USE_B7015 + OMP_Lock hostToDeviceLock[4], deviceToHostLock[4]; +#endif +}; + + +class BeamFormerPipeline : public Pipeline +{ + public: + BeamFormerPipeline(const Parset &); + + void doWork(); + + cl::Program intToFloatProgram, delayAndBandPassProgram, beamFormerProgram, transposeProgram, dedispersionChirpProgram; + + PerformanceCounter intToFloatCounter, fftCounter, delayAndBandPassCounter, beamFormerCounter, transposeCounter, dedispersionForwardFFTcounter, dedispersionChirpCounter, dedispersionBackwardFFTcounter; + PerformanceCounter samplesCounter; +}; + + +class UHEP_Pipeline : public Pipeline +{ + public: + UHEP_Pipeline(const Parset &); + + void doWork(); + + cl::Program beamFormerProgram, transposeProgram, invFFTprogram, invFIRfilterProgram, triggerProgram; + PerformanceCounter beamFormerCounter, transposeCounter, invFFTcounter, invFIRfilterCounter, triggerCounter; + PerformanceCounter beamFormerWeightsCounter, samplesCounter; +}; + + +Pipeline::Pipeline(const Parset &ps) +: + ps(ps)//, + //inputSection16(ps.nrBitsPerSample() == 16 ? new InputSection<i16complex>(ps, 0) : 0), + //inputSection8(ps.nrBitsPerSample() == 8 ? new InputSection<i8complex>(ps, 0) : 0), + //inputSection4(ps.nrBitsPerSample() == 4 ? new InputSection<i4complex>(ps, 0) : 0) +{ + createContext(context, devices); +} + + +cl::Program Pipeline::createProgram(const char *sources) +{ + return LOFAR::RTCP::createProgram(ps, context, devices, sources); +} + + +CorrelatorPipeline::CorrelatorPipeline(const Parset &ps) +: + Pipeline(ps), + filterBank(true, NR_TAPS, ps.nrChannelsPerSubband(), KAISER), + firFilterCounter("FIR filter"), + delayAndBandPassCounter("delay/bp"), + correlatorCounter("correlator"), + fftCounter("FFT"), + samplesCounter("samples"), + visibilitiesCounter("visibilities") +{ + filterBank.negateWeights(); + + double startTime = getTime(); + +#pragma omp parallel sections + { +#pragma omp section + firFilterProgram = createProgram("FIR.cl"); +#pragma omp section + delayAndBandPassProgram = createProgram("DelayAndBandPass.cl"); +#pragma omp section + correlatorProgram = createProgram("NewCorrelator.cl"); + //correlatorProgram = createProgram("Correlator.cl"); + } + + std::cout << "compile time = " << getTime() - startTime << std::endl; +} + + +BeamFormerPipeline::BeamFormerPipeline(const Parset &ps) +: + Pipeline(ps), + intToFloatCounter("int-to-float"), + fftCounter("FFT"), + delayAndBandPassCounter("delay/bp"), + beamFormerCounter("beamformer"), + transposeCounter("transpose"), + dedispersionForwardFFTcounter("ddisp.fw.FFT"), + dedispersionChirpCounter("chirp"), + dedispersionBackwardFFTcounter("ddisp.bw.FFT"), + samplesCounter("samples") +{ + double startTime = getTime(); + +#pragma omp parallel sections + { +#pragma omp section + intToFloatProgram = createProgram("BeamFormer/IntToFloat.cl"); +#pragma omp section + delayAndBandPassProgram = createProgram("DelayAndBandPass.cl"); +#pragma omp section + beamFormerProgram = createProgram("BeamFormer/BeamFormer.cl"); +#pragma omp section + transposeProgram = createProgram("BeamFormer/Transpose.cl"); +#pragma omp section + dedispersionChirpProgram = createProgram("BeamFormer/Dedispersion.cl"); + } + + std::cout << "compile time = " << getTime() - startTime << std::endl; +} + + +UHEP_Pipeline::UHEP_Pipeline(const Parset &ps) +: + Pipeline(ps), + beamFormerCounter("beamformer"), + transposeCounter("transpose"), + invFFTcounter("inv. FFT"), + invFIRfilterCounter("inv. FIR"), + triggerCounter("trigger"), + beamFormerWeightsCounter("BF weights"), + samplesCounter("samples") +{ + double startTime = getTime(); + +#pragma omp parallel sections + { +#pragma omp section + beamFormerProgram = createProgram("UHEP/BeamFormer.cl"); +#pragma omp section + transposeProgram = createProgram("UHEP/Transpose.cl"); +#pragma omp section + invFFTprogram = createProgram("UHEP/InvFFT.cl"); +#pragma omp section + invFIRfilterProgram = createProgram("UHEP/InvFIR.cl"); +#pragma omp section + triggerProgram = createProgram("UHEP/Trigger.cl"); + } + + std::cout << "compile time = " << getTime() - startTime << std::endl; +} + + +class WorkQueue +{ + public: + WorkQueue(Pipeline &); + + const unsigned gpu; + cl::Device &device; + cl::CommandQueue queue; + + protected: + const Parset &ps; +}; + + +class CorrelatorWorkQueue : public WorkQueue +{ + public: + CorrelatorWorkQueue(CorrelatorPipeline &); + + void doWork(); + +#if defined USE_TEST_DATA + void setTestPattern(); + void printTestOutput(); +#endif + + //private: + CorrelatorPipeline &pipeline; + cl::Buffer devFIRweights; + cl::Buffer devBufferA, devBufferB; + MultiArraySharedBuffer<float, 1> bandPassCorrectionWeights; + MultiArraySharedBuffer<float, 3> delaysAtBegin, delaysAfterEnd; + MultiArraySharedBuffer<float, 2> phaseOffsets; + MultiArraySharedBuffer<char, 4> inputSamples; + + cl::Buffer devFilteredData; + cl::Buffer devCorrectedData; + + MultiArraySharedBuffer<std::complex<float>, 4> visibilities; +}; + + +class BeamFormerWorkQueue : public WorkQueue +{ + public: + BeamFormerWorkQueue(BeamFormerPipeline &); + + void doWork(); + + BeamFormerPipeline &pipeline; + + MultiArraySharedBuffer<char, 4> inputSamples; + cl::Buffer devFilteredData; + MultiArraySharedBuffer<float, 1> bandPassCorrectionWeights; + MultiArraySharedBuffer<float, 3> delaysAtBegin, delaysAfterEnd; + MultiArraySharedBuffer<float, 2> phaseOffsets; + cl::Buffer devCorrectedData; + MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights; + cl::Buffer devComplexVoltages; + MultiArraySharedBuffer<std::complex<float>, 4> transposedComplexVoltages; + MultiArraySharedBuffer<float, 1> DMs; +}; + + +struct TriggerInfo { + float mean, variance, bestValue; + unsigned bestApproxIndex; +}; + +class UHEP_WorkQueue : public WorkQueue +{ + public: + UHEP_WorkQueue(UHEP_Pipeline &); + + void doWork(const float *delaysAtBegin, const float *delaysAfterEnd, const float *phaseOffsets); + + UHEP_Pipeline &pipeline; + cl::Event inputSamplesEvent, beamFormerWeightsEvent; + + cl::Buffer devBuffers[2]; + cl::Buffer devInputSamples; + MultiArrayHostBuffer<char, 5> hostInputSamples; + + cl::Buffer devBeamFormerWeights; + MultiArrayHostBuffer<std::complex<float>, 3> hostBeamFormerWeights; + + cl::Buffer devComplexVoltages; + cl::Buffer devReverseSubbandMapping; + cl::Buffer devFFTedData; + cl::Buffer devInvFIRfilteredData; + cl::Buffer devInvFIRfilterWeights; + + cl::Buffer devTriggerInfo; + VectorHostBuffer<TriggerInfo> hostTriggerInfo; +}; + + +class Kernel : public cl::Kernel +{ + public: + Kernel(const Parset &ps, cl::Program &program, const char *name) + : + cl::Kernel(program, name), + ps(ps) + { + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter) + { + queue.enqueueNDRangeKernel(*this, cl::NullRange, globalWorkSize, localWorkSize, 0, &event); + counter.doOperation(event, nrOperations, nrBytesRead, nrBytesWritten); + } + + protected: + cl::Event event; + const Parset &ps; + cl::NDRange globalWorkSize, localWorkSize; + size_t nrOperations, nrBytesRead, nrBytesWritten; +}; + + +class FIR_FilterKernel : public Kernel +{ + public: + FIR_FilterKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devFilteredData, cl::Buffer &devInputSamples, cl::Buffer &devFIRweights) + : + Kernel(ps, program, "FIR_filter") + { + setArg(0, devFilteredData); + setArg(1, devInputSamples); + setArg(2, devFIRweights); + + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + unsigned totalNrThreads = ps.nrChannelsPerSubband() * NR_POLARIZATIONS * 2; + unsigned nrPasses = (totalNrThreads + maxNrThreads - 1) / maxNrThreads; + globalWorkSize = cl::NDRange(totalNrThreads, ps.nrStations()); + localWorkSize = cl::NDRange(totalNrThreads / nrPasses, 1); + + size_t nrSamples = (size_t) ps.nrStations() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS; + nrOperations = nrSamples * ps.nrSamplesPerChannel() * NR_TAPS * 2 * 2; + nrBytesRead = nrSamples * (NR_TAPS - 1 + ps.nrSamplesPerChannel()) * ps.nrBytesPerComplexSample(); + nrBytesWritten = nrSamples * ps.nrSamplesPerChannel() * sizeof(std::complex<float>); + } +}; + + +class FFT_Kernel +{ + public: + FFT_Kernel(cl::Context &context, unsigned fftSize, unsigned nrFFTs, bool forward, cl::Buffer &buffer) + : + nrFFTs(nrFFTs), + fftSize(fftSize) +#if defined USE_CUSTOM_FFT + { + ASSERT(fftSize == 256); + ASSERT(forward); + std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); + cl::Program program = createProgram(context, devices, "FFT.cl", ""); + kernel = cl::Kernel(program, "fft0"); + kernel.setArg(0, buffer); + } +#else + , direction(forward ? clFFT_Forward : clFFT_Inverse), + plan(context, fftSize), + buffer(buffer) + { + } +#endif + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter) + { +#if defined USE_CUSTOM_FFT + queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(nrFFTs * 64 / 4, 4), cl::NDRange(64, 4), 0, &event); +#else + cl_int error = clFFT_ExecuteInterleaved(queue(), plan.plan, nrFFTs, direction, buffer(), buffer(), 0, 0, &event()); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clFFT_ExecuteInterleaved"); +#endif + + counter.doOperation(event, + (size_t) nrFFTs * 5 * fftSize * log2(fftSize), + (size_t) nrFFTs * fftSize * sizeof(std::complex<float>), + (size_t) nrFFTs * fftSize * sizeof(std::complex<float>)); + } + + private: + unsigned nrFFTs, fftSize; +#if defined USE_CUSTOM_FFT + cl::Kernel kernel; +#else + clFFT_Direction direction; + FFT_Plan plan; + cl::Buffer &buffer; +#endif + cl::Event event; +}; + + +class Filter_FFT_Kernel : public FFT_Kernel +{ + public: + Filter_FFT_Kernel(const Parset &ps, cl::Context &context, cl::Buffer &devFilteredData) + : + FFT_Kernel(context, ps.nrChannelsPerSubband(), ps.nrStations() * NR_POLARIZATIONS * ps.nrSamplesPerChannel(), true, devFilteredData) + { + } +}; + + +class DelayAndBandPassKernel : public Kernel +{ + public: + DelayAndBandPassKernel(const Parset &ps, cl::Program &program, cl::Buffer &devCorrectedData, cl::Buffer &devFilteredData, cl::Buffer &devDelaysAtBegin, cl::Buffer &devDelaysAfterEnd, cl::Buffer &devPhaseOffsets, cl::Buffer &devBandPassCorrectionWeights) + : + Kernel(ps, program, "applyDelaysAndCorrectBandPass") + { + ASSERT(ps.nrChannelsPerSubband() % 16 == 0 || ps.nrChannelsPerSubband() == 1); + ASSERT(ps.nrSamplesPerChannel() % 16 == 0); + + setArg(0, devCorrectedData); + setArg(1, devFilteredData); + setArg(4, devDelaysAtBegin); + setArg(5, devDelaysAfterEnd); + setArg(6, devPhaseOffsets); + setArg(7, devBandPassCorrectionWeights); + + globalWorkSize = cl::NDRange(256, ps.nrChannelsPerSubband() == 1 ? 1 : ps.nrChannelsPerSubband() / 16, ps.nrStations()); + localWorkSize = cl::NDRange(256, 1, 1); + + size_t nrSamples = ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS; + nrOperations = nrSamples * 12; + nrBytesRead = nrBytesWritten = nrSamples * sizeof(std::complex<float>); + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter, unsigned subband) + { + setArg(2, (float) ps.subbandToFrequencyMapping()[subband]); + setArg(3, 0); // beam + Kernel::enqueue(queue, counter); + } +}; + + +#if 0 + +class CorrelatorKernel : public Kernel +{ + public: + CorrelatorKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devVisibilities, cl::Buffer &devCorrectedData) + : +#if defined USE_4X4 + Kernel(ps, program, "correlate_4x4") +#elif defined USE_3X3 + Kernel(ps, program, "correlate_3x3") +#elif defined USE_2X2 + Kernel(ps, program, "correlate_2x2") +#else + Kernel(ps, program, "correlate") +#endif + { + setArg(0, devVisibilities); + setArg(1, devCorrectedData); + + size_t maxNrThreads, preferredMultiple; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + + std::vector<cl_context_properties> properties; + queue.getInfo<CL_QUEUE_CONTEXT>().getInfo(CL_CONTEXT_PROPERTIES, &properties); + + if (cl::Platform((cl_platform_id) properties[1]).getInfo<CL_PLATFORM_NAME>() == "AMD Accelerated Parallel Processing") + preferredMultiple = 256; + else + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, &preferredMultiple); + +#if defined USE_4X4 + unsigned quartStations = (ps.nrStations() + 2) / 4; + unsigned nrBlocks = quartStations * (quartStations + 1) / 2; +#elif defined USE_3X3 + unsigned thirdStations = (ps.nrStations() + 2) / 3; + unsigned nrBlocks = thirdStations * (thirdStations + 1) / 2; +#elif defined USE_2X2 + unsigned halfStations = (ps.nrStations() + 1) / 2; + unsigned nrBlocks = halfStations * (halfStations + 1) / 2; +#else + unsigned nrBlocks = ps.nrBaselines(); +#endif + unsigned nrPasses = (nrBlocks + maxNrThreads - 1) / maxNrThreads; + unsigned nrThreads = (nrBlocks + nrPasses - 1) / nrPasses; + nrThreads = (nrThreads + preferredMultiple - 1) / preferredMultiple * preferredMultiple; + //std::cout << "nrBlocks = " << nrBlocks << ", nrPasses = " << nrPasses << ", preferredMultiple = " << preferredMultiple << ", nrThreads = " << nrThreads << std::endl; + + globalWorkSize = cl::NDRange(nrPasses * nrThreads, ps.nrChannelsPerSubband()); + localWorkSize = cl::NDRange(nrThreads, 1); + + nrOperations = (size_t) ps.nrChannelsPerSubband() * ps.nrBaselines() * ps.nrSamplesPerChannel() * 32; + nrBytesRead = (size_t) nrPasses * ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrBaselines() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS * NR_POLARIZATIONS * sizeof(std::complex<float>); + } +}; + +#else + +class CorrelatorKernel : public Kernel +{ + public: + CorrelatorKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devVisibilities, cl::Buffer &devCorrectedData) + : +#if defined USE_2X2 + Kernel(ps, program, "correlateRectangles") +#else +#error not implemented +#endif + { + setArg(0, devVisibilities); + setArg(1, devCorrectedData); + + unsigned nrRectanglesPerSide = ((ps.nrStations() - 1) / (2 * 16)); + unsigned nrRectangles = nrRectanglesPerSide * (nrRectanglesPerSide + 1) / 2; +#pragma omp critical (cout) + std::cout << "nrRectangles = " << nrRectangles << std::endl; + + globalWorkSize = cl::NDRange(16 * 16, nrRectangles, ps.nrChannelsPerSubband()); + localWorkSize = cl::NDRange(16 * 16, 1, 1); + + nrOperations = (size_t) (32 * 32) * nrRectangles * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * 32; + nrBytesRead = (size_t) (32 + 32) * nrRectangles * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) (32 * 32) * nrRectangles * ps.nrChannelsPerSubband() * NR_POLARIZATIONS * NR_POLARIZATIONS * sizeof(std::complex<float>); + } +}; + +#endif + + +class IntToFloatKernel : public Kernel +{ + public: + IntToFloatKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devFilteredData, cl::Buffer &devInputSamples) + : + Kernel(ps, program, "intToFloat") + { + setArg(0, devFilteredData); + setArg(1, devInputSamples); + + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + globalWorkSize = cl::NDRange(maxNrThreads, ps.nrStations()); + localWorkSize = cl::NDRange(maxNrThreads, 1); + + size_t nrSamples = ps.nrStations() * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS; + nrOperations = nrSamples * 2; + nrBytesRead = nrSamples * 2 * ps.nrBitsPerSample() / 8; + nrBytesWritten = nrSamples * sizeof(std::complex<float>); + } +}; + + +class IncoherentStokesKernel : public Kernel +{ + public: + IncoherentStokesKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devIncoherentStokes, cl::Buffer &devInputSamples) + : + Kernel(ps, program, "incoherentStokes") + { + setArg(0, devIncoherentStokes); + setArg(1, devInputSamples); + + unsigned nrTimes = ps.nrSamplesPerChannel() / ps.incoherentStokesTimeIntegrationFactor(); + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + unsigned nrPasses = (nrTimes + maxNrThreads - 1) / maxNrThreads; + unsigned nrTimesPerPass = (nrTimes + nrPasses - 1) / nrPasses; + globalWorkSize = cl::NDRange(nrTimesPerPass * nrPasses, ps.nrChannelsPerSubband()); + localWorkSize = cl::NDRange(nrTimesPerPass, 1); + + nrOperations = ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrStations() * (ps.nrIncoherentStokes() == 1 ? 8 : 20 + 2.0 / ps.incoherentStokesTimeIntegrationFactor()); + nrBytesRead = (size_t) ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrIncoherentStokes() * nrTimes * ps.nrChannelsPerSubband() * sizeof(float); + } +}; + + +class BeamFormerKernel : public Kernel +{ + public: + BeamFormerKernel(const Parset &ps, cl::Program &program, cl::Buffer &devComplexVoltages, cl::Buffer &devCorrectedData, cl::Buffer &devBeamFormerWeights) + : + Kernel(ps, program, "complexVoltages") + { + setArg(0, devComplexVoltages); + setArg(1, devCorrectedData); + setArg(2, devBeamFormerWeights); + + globalWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), ps.nrChannelsPerSubband()); + localWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), 1); + + // FIXME: nrTABs + //queue.enqueueNDRangeKernel(*this, cl::NullRange, cl::NDRange(16, ps.nrTABs(), ps.nrChannelsPerSubband()), cl::NDRange(16, ps.nrTABs(), 1), 0, &event); + + size_t count = ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS; + size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t nrSampleBytesPerPass = count * ps.nrStations() * sizeof(std::complex<float>); + size_t nrComplexVoltagesBytesPerPass = count * ps.nrTABs() * sizeof(std::complex<float>); + unsigned nrPasses = std::max((ps.nrStations() + 6) / 16, 1U); + nrOperations = count * ps.nrStations() * ps.nrTABs() * 8; + nrBytesRead = nrWeightsBytes + nrSampleBytesPerPass + (nrPasses - 1) * nrComplexVoltagesBytesPerPass; + nrBytesWritten = nrPasses * nrComplexVoltagesBytesPerPass; + } +}; + + +class BeamFormerTransposeKernel : public Kernel +{ + public: + BeamFormerTransposeKernel(const Parset &ps, cl::Program &program, cl::Buffer &devTransposedData, cl::Buffer &devComplexVoltages) + : + Kernel(ps, program, "transposeComplexVoltages") + { + ASSERT(ps.nrSamplesPerChannel() % 16 == 0); + setArg(0, devTransposedData); + setArg(1, devComplexVoltages); + + //globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, (ps.nrChannelsPerSubband() + 15) / 16); + globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, ps.nrSamplesPerChannel() / 16); + localWorkSize = cl::NDRange(256, 1, 1); + + nrOperations = 0; + nrBytesRead = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>), + //nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * sizeof(std::complex<float>); + } +}; + + +#if 0 +class Dedispersion_FFT_Kernel +{ + public: + Dedispersion_FFT_Kernel(const Parset &ps, cl::Context &context, cl::Buffer &buffer) + : + ps(ps), + plan(context, ps.dedispersionFFTsize()), + buffer(buffer) + { + ASSERT(ps.nrSamplesPerChannel() % ps.dedispersionFFTsize() == 0); + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter, clFFT_Direction direction) + { + size_t nrFFTs = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(); + + cl_int error = clFFT_ExecuteInterleaved(queue(), plan.plan, nrFFTs, direction, buffer(), buffer(), 0, 0, &event()); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clFFT_ExecuteInterleaved"); + + counter.doOperation(event, + nrFFTs * 5 * ps.dedispersionFFTsize() * log2(ps.dedispersionFFTsize()), + nrFFTs * ps.dedispersionFFTsize() * sizeof(std::complex<float>), + nrFFTs * ps.dedispersionFFTsize() * sizeof(std::complex<float>)); + } + + private: + const Parset &ps; + FFT_Plan plan; + cl::Buffer &buffer; + cl::Event event; +}; +#else +class DedispersionForwardFFTkernel : public FFT_Kernel +{ + public: + DedispersionForwardFFTkernel(const Parset &ps, cl::Context &context, cl::Buffer &buffer) + : + FFT_Kernel(context, ps.dedispersionFFTsize(), ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(), true, buffer) + { + ASSERT(ps.nrSamplesPerChannel() % ps.dedispersionFFTsize() == 0); + } +}; + + +class DedispersionBackwardFFTkernel : public FFT_Kernel +{ + public: + DedispersionBackwardFFTkernel(const Parset &ps, cl::Context &context, cl::Buffer &buffer) + : + FFT_Kernel(context, ps.dedispersionFFTsize(), ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(), false, buffer) + { + ASSERT(ps.nrSamplesPerChannel() % ps.dedispersionFFTsize() == 0); + } +}; +#endif + + +class DedispersionChirpKernel : public Kernel +{ + public: + DedispersionChirpKernel(const Parset &ps, cl::Program &program, cl::CommandQueue &queue, cl::Buffer &buffer, cl::Buffer &DMs) + : + Kernel(ps, program, "applyChirp") + { + setArg(0, buffer); + setArg(1, DMs); + + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + unsigned fftSize = ps.dedispersionFFTsize(); + + globalWorkSize = cl::NDRange(fftSize, ps.nrSamplesPerChannel() / fftSize, ps.nrChannelsPerSubband()); + //std::cout << "globalWorkSize = NDRange(" << fftSize << ", " << ps.nrSamplesPerChannel() / fftSize << ", " << ps.nrChannelsPerSubband() << ')' << std::endl; + + if (fftSize <= maxNrThreads) { + localWorkSize = cl::NDRange(fftSize, 1, maxNrThreads / fftSize); + //std::cout << "localWorkSize = NDRange(" << fftSize << ", 1, " << maxNrThreads / fftSize << ')' << std::endl; + } else { + unsigned divisor; + + for (divisor = 1; fftSize / divisor > maxNrThreads || fftSize % divisor != 0; divisor ++) + ; + + localWorkSize = cl::NDRange(fftSize / divisor, 1, 1); + //std::cout << "localWorkSize = NDRange(" << fftSize / divisor << ", 1, 1))" << std::endl; + } + + nrOperations = (size_t) NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * (9 * ps.nrTABs() + 17), + nrBytesRead = nrBytesWritten = sizeof(std::complex<float>) * ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel(); + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter, double subbandFrequency) + { + setArg(2, (float) subbandFrequency); + Kernel::enqueue(queue, counter); + } +}; + + +class CoherentStokesKernel : public Kernel +{ + public: + CoherentStokesKernel(const Parset &ps, cl::Program &program, cl::Buffer &devStokesData, cl::Buffer &devComplexVoltages) + : + Kernel(ps, program, "coherentStokes") + { + ASSERT(ps.nrChannelsPerSubband() >= 16 && ps.nrChannelsPerSubband() % 16 == 0); + ASSERT(ps.nrCoherentStokes() == 1 || ps.nrCoherentStokes() == 4); + setArg(0, devStokesData); + setArg(1, devComplexVoltages); + + globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, (ps.nrChannelsPerSubband() + 15) / 16); + localWorkSize = cl::NDRange(256, 1, 1); + + nrOperations = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * (ps.nrCoherentStokes() == 1 ? 8 : 20 + 2.0 / ps.coherentStokesTimeIntegrationFactor()); + nrBytesRead = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrTABs() * ps.nrCoherentStokes() * ps.nrSamplesPerChannel() / ps.coherentStokesTimeIntegrationFactor() * ps.nrChannelsPerSubband() * sizeof(float); + } +}; + + +class UHEP_BeamFormerKernel : public Kernel +{ + public: + UHEP_BeamFormerKernel(const Parset &ps, cl::Program &program, cl::Buffer &devComplexVoltages, cl::Buffer &devInputSamples, cl::Buffer &devBeamFormerWeights) + : + Kernel(ps, program, "complexVoltages") + { + setArg(0, devComplexVoltages); + setArg(1, devInputSamples); + setArg(2, devBeamFormerWeights); + +#if 1 + globalWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), ps.nrSubbands()); + localWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), 1); + + size_t count = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS; + size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrSubbands() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t nrSampleBytes = count * ps.nrStations() * ps.nrBytesPerComplexSample(); + size_t nrComplexVoltagesBytesPerPass = count * ps.nrTABs() * sizeof(std::complex<float>); + unsigned nrPasses = std::max((ps.nrStations() + 6) / 16, 1U); + nrOperations = count * ps.nrStations() * ps.nrTABs() * 8; + nrBytesRead = nrWeightsBytes + nrSampleBytes + (nrPasses - 1) * nrComplexVoltagesBytesPerPass; + nrBytesWritten = nrPasses * nrComplexVoltagesBytesPerPass; +#else + ASSERT(ps.nrTABs() % 3 == 0); + ASSERT(ps.nrStations() % 6 == 0); + unsigned nrThreads = NR_POLARIZATIONS * (ps.nrTABs() / 3) * (ps.nrStations() / 6); + globalWorkSize = cl::NDRange(nrThreads, ps.nrSubbands()); + localWorkSize = cl::NDRange(nrThreads, 1); + //globalWorkSize = cl::NDRange(ps.nrStations() / 6, ps.nrTABs() / 3, ps.nrSubbands()); + //localWorkSize = cl::NDRange(ps.nrStations() / 6, ps.nrTABs() / 3, 1); + + size_t count = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS; + size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrSubbands() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t nrSampleBytes = count * ps.nrStations() * ps.nrBytesPerComplexSample(); + size_t nrComplexVoltagesBytes = count * ps.nrTABs() * sizeof(std::complex<float>); + nrOperations = count * ps.nrStations() * ps.nrTABs() * 8; + nrBytesRead = nrWeightsBytes + nrSampleBytes; + nrBytesWritten = nrComplexVoltagesBytes; +#endif + } +}; + + +class UHEP_TransposeKernel : public Kernel +{ + public: + UHEP_TransposeKernel(const Parset &ps, cl::Program &program, cl::Buffer &devFFTedData, cl::Buffer &devComplexVoltages, cl::Buffer &devReverseSubbandMapping) + : + Kernel(ps, program, "UHEP_Transpose") + { + setArg(0, devFFTedData); + setArg(1, devComplexVoltages); + setArg(2, devReverseSubbandMapping); + + globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, 512 / 16); + localWorkSize = cl::NDRange(256, 1, 1); + + nrOperations = 0; + nrBytesRead = (size_t) ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * 512 * sizeof(std::complex<float>); + } +}; + + +class UHEP_InvFFT_Kernel : public Kernel +{ + public: + UHEP_InvFFT_Kernel(const Parset &ps, cl::Program &program, cl::Buffer &devFFTedData) + : + Kernel(ps, program, "inv_fft") + { + setArg(0, devFFTedData); + setArg(1, devFFTedData); + + globalWorkSize = cl::NDRange(128, ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel()); + localWorkSize = cl::NDRange(128, 1); + + size_t nrFFTs = (size_t) ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1); + nrOperations = nrFFTs * 5 * 1024 * 10; + nrBytesRead = nrFFTs * 512 * sizeof(std::complex<float>); + nrBytesWritten = nrFFTs * 1024 * sizeof(float); + } +}; + + +class UHEP_InvFIR_Kernel : public Kernel +{ + public: + UHEP_InvFIR_Kernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devInvFIRfilteredData, cl::Buffer &devFFTedData, cl::Buffer &devInvFIRfilterWeights) + : + Kernel(ps, program, "invFIRfilter") + { + setArg(0, devInvFIRfilteredData); + setArg(1, devFFTedData); + setArg(2, devInvFIRfilterWeights); + + size_t maxNrThreads, nrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + // round down to nearest power of two + for (nrThreads = 1024; nrThreads > maxNrThreads; nrThreads /= 2) + ; + + globalWorkSize = cl::NDRange(1024, NR_POLARIZATIONS, ps.nrTABs()); + localWorkSize = cl::NDRange(nrThreads, 1, 1); + + size_t count = ps.nrTABs() * NR_POLARIZATIONS * 1024; + nrOperations = count * ps.nrSamplesPerChannel() * NR_STATION_FILTER_TAPS * 2; + nrBytesRead = count * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * sizeof(float); + nrBytesWritten = count * ps.nrSamplesPerChannel() * sizeof(float); + } +}; + + +class UHEP_TriggerKernel : public Kernel +{ + public: + UHEP_TriggerKernel(const Parset &ps, cl::Program &program, cl::Buffer &devTriggerInfo, cl::Buffer &devInvFIRfilteredData) + : + Kernel(ps, program, "trigger") + { + setArg(0, devTriggerInfo); + setArg(1, devInvFIRfilteredData); + + globalWorkSize = cl::NDRange(16, 16, ps.nrTABs()); + localWorkSize = cl::NDRange(16, 16, 1); + + nrOperations = (size_t) ps.nrTABs() * ps.nrSamplesPerChannel() * 1024 * (3 /* power */ + 2 /* window */ + 1 /* max */ + 7 /* mean/variance */); + nrBytesRead = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * 1024 * sizeof(float); + nrBytesWritten = (size_t) ps.nrTABs() * sizeof(TriggerInfo); + } +}; + + +WorkQueue::WorkQueue(Pipeline &pipeline) +: + gpu(omp_get_thread_num() % nrGPUs), + device(pipeline.devices[gpu]), + ps(pipeline.ps) +{ +#if defined __linux__ && defined USE_B7015 + set_affinity(gpu); +#endif + + queue = cl::CommandQueue(pipeline.context, device, profiling ? CL_QUEUE_PROFILING_ENABLE : 0); +} + + +CorrelatorWorkQueue::CorrelatorWorkQueue(CorrelatorPipeline &pipeline) +: + WorkQueue(pipeline), + pipeline(pipeline), + devBufferA(pipeline.context, CL_MEM_READ_WRITE, ps.nrStations() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>)), + devBufferB(pipeline.context, CL_MEM_READ_WRITE, ps.nrStations() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>)), + bandPassCorrectionWeights(boost::extents[ps.nrChannelsPerSubband()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAtBegin(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAfterEnd(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + phaseOffsets(boost::extents[ps.nrBeams()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + //inputSamples(boost::extents[ps.nrStations()][(ps.nrSamplesPerChannel() + NR_TAPS - 1) * ps.nrChannelsPerSubband()][NR_POLARIZATIONS][ps.nrBytesPerComplexSample()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + //visibilities(boost::extents[ps.nrBaselines()][ps.nrChannelsPerSubband()][NR_POLARIZATIONS][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY) + inputSamples(boost::extents[ps.nrStations()][(ps.nrSamplesPerChannel() + NR_TAPS - 1) * ps.nrChannelsPerSubband()][NR_POLARIZATIONS][ps.nrBytesPerComplexSample()], queue, CL_MEM_WRITE_ONLY, devBufferA), + visibilities(boost::extents[ps.nrBaselines()][ps.nrChannelsPerSubband()][NR_POLARIZATIONS][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, devBufferB) +{ + size_t firWeightsSize = ps.nrChannelsPerSubband() * NR_TAPS * sizeof(float); + devFIRweights = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, firWeightsSize); + queue.enqueueWriteBuffer(devFIRweights, CL_TRUE, 0, ps.nrChannelsPerSubband() * NR_TAPS * sizeof(float), pipeline.filterBank.getWeights().origin()); + +#if 0 + size_t filteredDataSize = ps.nrStations() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>); + devFilteredData = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, filteredDataSize); + devCorrectedData = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, filteredDataSize); +#else + devFilteredData = devBufferB; + devCorrectedData = devBufferA; +#endif + + if (ps.correctBandPass()) { + BandPass::computeCorrectionFactors(bandPassCorrectionWeights.origin(), ps.nrChannelsPerSubband()); + bandPassCorrectionWeights.hostToDevice(CL_TRUE); + } +} + + +void CorrelatorWorkQueue::doWork() +{ + FIR_FilterKernel firFilterKernel(ps, queue, pipeline.firFilterProgram, devFilteredData, inputSamples, devFIRweights); + Filter_FFT_Kernel fftKernel(ps, pipeline.context, devFilteredData); + DelayAndBandPassKernel delayAndBandPassKernel(ps, pipeline.delayAndBandPassProgram, devCorrectedData, devFilteredData, delaysAtBegin, delaysAfterEnd, phaseOffsets, bandPassCorrectionWeights); + CorrelatorKernel correlatorKernel(ps, queue, pipeline.correlatorProgram, visibilities, devCorrectedData); + double startTime = ps.startTime(), currentTime, stopTime = ps.stopTime(), blockTime = ps.CNintegrationTime(); + +#pragma omp barrier + + double executionStartTime = getTime(); + + for (unsigned block = 0; (currentTime = startTime + block * blockTime) < stopTime; block ++) { +#pragma omp single +#pragma omp critical (cout) + std::cout << "block = " << block << ", time = " << to_simple_string(from_ustime_t(currentTime)) << std::endl; + + memset(delaysAtBegin.origin(), 0, delaysAtBegin.bytesize()); + memset(delaysAfterEnd.origin(), 0, delaysAfterEnd.bytesize()); + memset(phaseOffsets.origin(), 0, phaseOffsets.bytesize()); + + // FIXME!!! + if (ps.nrStations() >= 3) + delaysAtBegin[0][2][0] = 1e-6, delaysAfterEnd[0][2][0] = 1.1e-6; + + delaysAtBegin.hostToDevice(CL_FALSE); + delaysAfterEnd.hostToDevice(CL_FALSE); + phaseOffsets.hostToDevice(CL_FALSE); + queue.finish(); + +#pragma omp barrier + +#pragma omp for schedule(dynamic) + for (unsigned subband = 0; subband < ps.nrSubbands(); subband ++) { + try { +#if defined USE_TEST_DATA + if (subband == 0) + setTestPattern(); +#endif + + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.hostToDeviceLock[gpu / 2]); +#endif + inputSamples.hostToDevice(CL_TRUE); + pipeline.samplesCounter.doOperation(inputSamples.event, 0, 0, inputSamples.bytesize()); + } + + if (ps.nrChannelsPerSubband() > 1) { + firFilterKernel.enqueue(queue, pipeline.firFilterCounter); + fftKernel.enqueue(queue, pipeline.fftCounter); + } + + delayAndBandPassKernel.enqueue(queue, pipeline.delayAndBandPassCounter, subband); + correlatorKernel.enqueue(queue, pipeline.correlatorCounter); + queue.finish(); + + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.deviceToHostLock[gpu / 2]); +#endif + visibilities.deviceToHost(CL_TRUE); + pipeline.visibilitiesCounter.doOperation(visibilities.event, 0, visibilities.bytesize(), 0); + } + +#if defined USE_TEST_DATA + if (subband == 0) + printTestOutput(); +#endif + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + } + } + +#pragma omp barrier + +#pragma omp master + if (!profiling) +#pragma omp critical (cout) + std::cout << "run time = " << getTime() - executionStartTime << std::endl; +} + + +// complexVoltages() +// float2 (*ComplexVoltagesType)[NR_CHANNELS][NR_TIMES_PER_BLOCK][NR_TABS][NR_POLARIZATIONS]; +// transpose() +// +// float2 (*DedispersedDataType)[nrTABs][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()]; +// FFT() +// +// applyChrip() +// +// FFT-1() +// float2 (*DedispersedDataType)[nrTABs][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()]; +// (*ComplexVoltagesType)[NR_CHANNELS][NR_TIMES_PER_BLOCK][NR_TABS]; +// computeStokes() +// float (*StokesType)[NR_TABS][NR_STOKES][NR_TIMES_PER_BLOCK / STOKES_INTEGRATION_SAMPLES][NR_CHANNELS]; + + +BeamFormerWorkQueue::BeamFormerWorkQueue(BeamFormerPipeline &pipeline) +: + WorkQueue(pipeline), + pipeline(pipeline), + inputSamples(boost::extents[ps.nrStations()][ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband()][NR_POLARIZATIONS][ps.nrBytesPerComplexSample()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + devFilteredData(pipeline.context, CL_MEM_READ_WRITE, ps.nrStations() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>)), + bandPassCorrectionWeights(boost::extents[ps.nrChannelsPerSubband()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAtBegin(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAfterEnd(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + phaseOffsets(boost::extents[ps.nrBeams()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + devCorrectedData(cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS * sizeof(std::complex<float>))), + beamFormerWeights(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + devComplexVoltages(cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>))), + //transposedComplexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE) + transposedComplexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE), + DMs(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY) +{ + if (ps.correctBandPass()) { + BandPass::computeCorrectionFactors(bandPassCorrectionWeights.origin(), ps.nrChannelsPerSubband()); + bandPassCorrectionWeights.hostToDevice(CL_TRUE); + } +} + + +void BeamFormerWorkQueue::doWork() +{ + //queue.enqueueWriteBuffer(devFIRweights, CL_TRUE, 0, firWeightsSize, firFilterWeights); + bandPassCorrectionWeights.hostToDevice(CL_TRUE); + DMs.hostToDevice(CL_TRUE); + + IntToFloatKernel intToFloatKernel(ps, queue, pipeline.intToFloatProgram, devFilteredData, inputSamples); + Filter_FFT_Kernel fftKernel(ps, pipeline.context, devFilteredData); + DelayAndBandPassKernel delayAndBandPassKernel(ps, pipeline.delayAndBandPassProgram, devCorrectedData, devFilteredData, delaysAtBegin, delaysAfterEnd, phaseOffsets, bandPassCorrectionWeights); + BeamFormerKernel beamFormerKernel(ps, pipeline.beamFormerProgram, devComplexVoltages, devCorrectedData, beamFormerWeights); + BeamFormerTransposeKernel transposeKernel(ps, pipeline.transposeProgram, transposedComplexVoltages, devComplexVoltages); + DedispersionForwardFFTkernel dedispersionForwardFFTkernel(ps, pipeline.context, transposedComplexVoltages); + DedispersionBackwardFFTkernel dedispersionBackwardFFTkernel(ps, pipeline.context, transposedComplexVoltages); + DedispersionChirpKernel dedispersionChirpKernel(ps, pipeline.dedispersionChirpProgram, queue, transposedComplexVoltages, DMs); + double startTime = ps.startTime(), currentTime, stopTime = ps.stopTime(), blockTime = ps.CNintegrationTime(); + +#pragma omp barrier + + double executionStartTime = getTime(); + + for (unsigned block = 0; (currentTime = startTime + block * blockTime) < stopTime; block ++) { +#pragma omp single +#pragma omp critical (cout) + std::cout << "block = " << block << ", time = " << to_simple_string(from_ustime_t(currentTime)) << std::endl; + + memset(delaysAtBegin.origin(), 0, delaysAtBegin.bytesize()); + memset(delaysAfterEnd.origin(), 0, delaysAfterEnd.bytesize()); + memset(phaseOffsets.origin(), 0, phaseOffsets.bytesize()); + + // FIXME!!! + if (ps.nrStations() >= 3) + delaysAtBegin[0][2][0] = 1e-6, delaysAfterEnd[0][2][0] = 1.1e-6; + + delaysAtBegin.hostToDevice(CL_FALSE); + delaysAfterEnd.hostToDevice(CL_FALSE); + phaseOffsets.hostToDevice(CL_FALSE); + beamFormerWeights.hostToDevice(CL_FALSE); + queue.finish(); + +#pragma omp barrier + +#pragma omp for schedule(dynamic) + for (unsigned subband = 0; subband < ps.nrSubbands(); subband ++) { + try { +#if 1 + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.hostToDeviceLock[gpu / 2]); +#endif + inputSamples.hostToDevice(CL_TRUE); + pipeline.samplesCounter.doOperation(inputSamples.event, 0, 0, inputSamples.bytesize()); + } +#endif + +//#pragma omp critical (GPU) +{ + if (ps.nrChannelsPerSubband() > 1) { + intToFloatKernel.enqueue(queue, pipeline.intToFloatCounter); + fftKernel.enqueue(queue, pipeline.fftCounter); + } + + delayAndBandPassKernel.enqueue(queue, pipeline.delayAndBandPassCounter, subband); + beamFormerKernel.enqueue(queue, pipeline.beamFormerCounter); + transposeKernel.enqueue(queue, pipeline.transposeCounter); + dedispersionForwardFFTkernel.enqueue(queue, pipeline.dedispersionForwardFFTcounter); + dedispersionChirpKernel.enqueue(queue, pipeline.dedispersionChirpCounter, ps.subbandToFrequencyMapping()[subband]); + dedispersionBackwardFFTkernel.enqueue(queue, pipeline.dedispersionBackwardFFTcounter); + + queue.finish(); +} + + //queue.enqueueReadBuffer(devComplexVoltages, CL_TRUE, 0, hostComplexVoltages.bytesize(), hostComplexVoltages.origin()); + //dedispersedData.deviceToHost(CL_TRUE); + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + } + } + +#pragma omp barrier + +#pragma omp master + if (!profiling) +#pragma omp critical (cout) + std::cout << "run time = " << getTime() - executionStartTime << std::endl; +} + + +UHEP_WorkQueue::UHEP_WorkQueue(UHEP_Pipeline &pipeline) +: + WorkQueue(pipeline), + pipeline(pipeline), + hostInputSamples(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS][ps.nrBytesPerComplexSample()], queue, CL_MEM_WRITE_ONLY), + hostBeamFormerWeights(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY), + hostTriggerInfo(ps.nrTABs(), queue, CL_MEM_READ_ONLY) +{ + size_t inputSamplesSize = ps.nrStations() * ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS * ps.nrBytesPerComplexSample(); + size_t complexVoltagesSize = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t transposedDataSize = ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * 512 * sizeof(std::complex<float>); + size_t invFIRfilteredDataSize = ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * 512 * sizeof(std::complex<float>); + + size_t buffer0size = std::max(inputSamplesSize, transposedDataSize); + size_t buffer1size = std::max(complexVoltagesSize, invFIRfilteredDataSize); + + devBuffers[0] = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, buffer0size); + devBuffers[1] = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, buffer1size); + + size_t beamFormerWeightsSize = ps.nrStations() * ps.nrSubbands() * ps.nrTABs() * sizeof(std::complex<float>); + devBeamFormerWeights = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, beamFormerWeightsSize); + + devInputSamples = devBuffers[0]; + devComplexVoltages = devBuffers[1]; + + devReverseSubbandMapping = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, 512 * sizeof(int)); + devInvFIRfilterWeights = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, 1024 * NR_STATION_FILTER_TAPS * sizeof(float)); + devFFTedData = devBuffers[0]; + devInvFIRfilteredData = devBuffers[1]; + + devTriggerInfo = cl::Buffer(pipeline.context, CL_MEM_WRITE_ONLY, ps.nrTABs() * sizeof(TriggerInfo)); +} + + +void UHEP_WorkQueue::doWork(const float * /*delaysAtBegin*/, const float * /*delaysAfterEnd*/, const float * /*phaseOffsets*/) +{ + UHEP_BeamFormerKernel beamFormer(ps, pipeline.beamFormerProgram, devComplexVoltages, devInputSamples, devBeamFormerWeights); + UHEP_TransposeKernel transpose(ps, pipeline.transposeProgram, devFFTedData, devComplexVoltages, devReverseSubbandMapping); + UHEP_InvFFT_Kernel invFFT(ps, pipeline.invFFTprogram, devFFTedData); + UHEP_InvFIR_Kernel invFIR(ps, queue, pipeline.invFIRfilterProgram, devInvFIRfilteredData, devFFTedData, devInvFIRfilterWeights); + UHEP_TriggerKernel trigger(ps, pipeline.triggerProgram, devTriggerInfo, devInvFIRfilteredData); + double startTime = ps.startTime(), stopTime = ps.stopTime(), blockTime = ps.CNintegrationTime(); + unsigned nrBlocks = (stopTime - startTime) / blockTime; + + queue.enqueueWriteBuffer(devInvFIRfilterWeights, CL_FALSE, 0, sizeof invertedStationPPFWeights, invertedStationPPFWeights); + queue.enqueueWriteBuffer(devReverseSubbandMapping, CL_TRUE, 0, 512 * sizeof(int), reverseSubbandMapping); + +#pragma omp barrier + + double executionStartTime = getTime(); + +#pragma omp for schedule(dynamic) + for (unsigned block = 0; block < nrBlocks; block ++) { + try { + double currentTime = startTime + block * blockTime; + +//#pragma omp single // FIXME: why does the compiler complain here??? +#pragma omp critical (cout) + std::cout << "block = " << block << ", time = " << to_simple_string(from_ustime_t(currentTime)) << std::endl; + +#if 0 + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.hostToDeviceLock[gpu / 2]); +#endif + queue.enqueueWriteBuffer(devInputSamples, CL_TRUE, 0, sampledDataSize, hostInputSamples.origin(), 0, &samplesEvent); + } +#endif + + queue.enqueueWriteBuffer(devBeamFormerWeights, CL_FALSE, 0, hostBeamFormerWeights.bytesize(), hostBeamFormerWeights.origin(), 0, &beamFormerWeightsEvent); + pipeline.beamFormerWeightsCounter.doOperation(beamFormerWeightsEvent, 0, 0, hostBeamFormerWeights.bytesize()); + + queue.enqueueWriteBuffer(devInputSamples, CL_FALSE, 0, hostInputSamples.bytesize(), hostInputSamples.origin(), 0, &inputSamplesEvent); + pipeline.samplesCounter.doOperation(inputSamplesEvent, 0, 0, hostInputSamples.bytesize()); + + beamFormer.enqueue(queue, pipeline.beamFormerCounter); + transpose.enqueue(queue, pipeline.transposeCounter); + invFFT.enqueue(queue, pipeline.invFFTcounter); + invFIR.enqueue(queue, pipeline.invFIRfilterCounter); + trigger.enqueue(queue, pipeline.triggerCounter); + queue.finish(); // necessary to overlap I/O & computations ??? + queue.enqueueReadBuffer(devTriggerInfo, CL_TRUE, 0, hostTriggerInfo.size() * sizeof(TriggerInfo), &hostTriggerInfo[0]); + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + } + +#pragma omp barrier + +#pragma omp master + if (!profiling) +#pragma omp critical (cout) + std::cout << "run time = " << getTime() - executionStartTime << std::endl; +} + + +#if defined USE_TEST_DATA + +void CorrelatorWorkQueue::setTestPattern() +{ + if (ps.nrStations() >= 3) { + double centerFrequency = 384 * ps.sampleRate(); + double baseFrequency = centerFrequency - .5 * ps.sampleRate(); + unsigned testSignalChannel = ps.nrChannelsPerSubband() >= 231 ? 230 : ps.nrChannelsPerSubband() / 2; + double signalFrequency = baseFrequency + testSignalChannel * ps.sampleRate() / ps.nrChannelsPerSubband(); + + for (unsigned time = 0; time < (NR_TAPS - 1 + ps.nrSamplesPerChannel()) * ps.nrChannelsPerSubband(); time ++) { + double phi = 2.0 * M_PI * signalFrequency * time / ps.sampleRate(); + + switch (ps.nrBytesPerComplexSample()) { + case 4 : reinterpret_cast<std::complex<short> &>(hostInputSamples[2][time][1][0]) = std::complex<short>((short) rint(32767 * cos(phi)), (short) rint(32767 * sin(phi))); + break; + + case 2 : reinterpret_cast<std::complex<signed char> &>(hostInputSamples[2][time][1][0]) = std::complex<signed char>((signed char) rint(127 * cos(phi)), (signed char) rint(127 * sin(phi))); + break; + } + } + } +} + + +void CorrelatorWorkQueue::printTestOutput() +{ + if (ps.nrBaselines() >= 6) +#pragma omp critical (cout) + { + std::cout << "newgraph newcurve linetype solid pts" << std::endl; + + //for (int channel = 0; channel < ps.nrChannelsPerSubband(); channel ++) + if (ps.nrChannelsPerSubband() == 256) + for (int channel = 228; channel <= 232; channel ++) + std::cout << channel << ' ' << hostVisibilities[5][channel][1][1] << std::endl; + } +} + +#endif + + +void CorrelatorPipeline::doWork() +{ +#pragma omp parallel num_threads((profiling ? 1 : 2) * nrGPUs) + try + { + CorrelatorWorkQueue(*this).doWork(); + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } +} + + +void BeamFormerPipeline::doWork() +{ +#pragma omp parallel num_threads((profiling ? 1 : 2) * nrGPUs) + try + { + BeamFormerWorkQueue(*this).doWork(); + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } +} + + +void UHEP_Pipeline::doWork() +{ + float delaysAtBegin[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS] __attribute__((aligned(32))); + float delaysAfterEnd[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS] __attribute__((aligned(32))); + float phaseOffsets[ps.nrStations()][NR_POLARIZATIONS] __attribute__((aligned(32))); + + memset(delaysAtBegin, 0, sizeof delaysAtBegin); + memset(delaysAfterEnd, 0, sizeof delaysAfterEnd); + memset(phaseOffsets, 0, sizeof phaseOffsets); + delaysAtBegin[0][2][0] = 1e-6, delaysAfterEnd[0][2][0] = 1.1e-6; + +#pragma omp parallel num_threads((profiling ? 1 : 2) * nrGPUs) + try + { + UHEP_WorkQueue(*this).doWork(&delaysAtBegin[0][0][0], &delaysAfterEnd[0][0][0], &phaseOffsets[0][0]); + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } +} + + +class UnitTest +{ + protected: + UnitTest(const Parset &ps, const char *programName = 0) + : + counter(programName != 0 ? programName : "test") + { + createContext(context, devices); + queue = cl::CommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE); + + if (programName != 0) + program = createProgram(ps, context, devices, programName); + } + + template <typename T> void check(T actual, T expected) + { + if (expected != actual) { + std::cerr << "Test FAILED: expected " << expected << ", computed " << actual << std::endl; + exit(1); + } else { + std::cout << "Test OK" << std::endl; + } + } + + cl::Context context; + std::vector<cl::Device> devices; + cl::Program program; + cl::CommandQueue queue; + + PerformanceCounter counter; +}; + + +struct CorrelatorTest : public UnitTest +{ + CorrelatorTest(const Parset &ps) + : + //UnitTest(ps, "Correlator.cl") + UnitTest(ps, "NewCorrelator.cl") + { + if (ps.nrStations() >= 5 && ps.nrChannelsPerSubband() >= 6 && ps.nrSamplesPerChannel() >= 100) { + MultiArraySharedBuffer<std::complex<float>, 4> inputData(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> visibilities(boost::extents[ps.nrBaselines()][ps.nrChannelsPerSubband()][NR_POLARIZATIONS][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + CorrelatorKernel correlator(ps, queue, program, visibilities, inputData); + + //inputData[3][5][99][1] = std::complex<float>(3, 4); + //inputData[4][5][99][1] = std::complex<float>(5, 6); + inputData[2][5][99][1] = std::complex<float>(3, 4); + inputData[65][5][99][1] = std::complex<float>(5, 6); + +visibilities.hostToDevice(CL_FALSE); + inputData.hostToDevice(CL_FALSE); + correlator.enqueue(queue, counter); + visibilities.deviceToHost(CL_TRUE); + + //check(visibilities[13][5][1][1], std::complex<float>(39, 2)); + //check(visibilities[5463][5][1][1], std::complex<float>(39, 2)); + for (unsigned bl = 0; bl < ps.nrBaselines(); bl ++) + if (visibilities[bl][5][1][1] != std::complex<float>(0, 0)) + std::cout << "bl = " << bl << ", visibility = " << visibilities[bl][5][1][1] << std::endl; + } + } +}; + + +struct IncoherentStokesTest : public UnitTest +{ + IncoherentStokesTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/IncoherentStokes.cl") + { + if (ps.nrStations() >= 5 && ps.nrChannelsPerSubband() >= 14 && ps.nrSamplesPerChannel() >= 108) { + MultiArraySharedBuffer<std::complex<float>, 4> inputData(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<float, 3> stokesData(boost::extents[ps.nrIncoherentStokes()][ps.nrSamplesPerChannel() / ps.incoherentStokesTimeIntegrationFactor()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + IncoherentStokesKernel kernel(ps, queue, program, stokesData, inputData); + + inputData[4][13][107][0] = std::complex<float>(2, 3); + inputData[4][13][107][1] = std::complex<float>(4, 5); + + inputData.hostToDevice(CL_FALSE); + kernel.enqueue(queue, counter); + stokesData.deviceToHost(CL_TRUE); + + const static float expected[] = { 54, -28, 46, 4 }; + + for (unsigned stokes = 0; stokes < ps.nrIncoherentStokes(); stokes ++) + check(stokesData[stokes][107 / ps.incoherentStokesTimeIntegrationFactor()][13], expected[stokes]); + } + } +}; + + +struct IntToFloatTest : public UnitTest +{ + IntToFloatTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/IntToFloat.cl") + { + if (ps.nrStations() >= 3 && ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() >= 10077) { + MultiArraySharedBuffer<char, 4> inputData(boost::extents[ps.nrStations()][ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband()][NR_POLARIZATIONS][ps.nrBytesPerComplexSample()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 3> outputData(boost::extents[ps.nrStations()][NR_POLARIZATIONS][ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + IntToFloatKernel kernel(ps, queue, program, outputData, inputData); + + switch (ps.nrBytesPerComplexSample()) { + case 4 : reinterpret_cast<std::complex<short> &>(inputData[2][10076][1][0]) = 7; + break; + + case 2 : reinterpret_cast<std::complex<signed char> &>(inputData[2][10076][1][0]) = 7; + break; + + case 1 : reinterpret_cast<i4complex &>(inputData[2][10076][1][0]) = i4complex(7, 0); + break; + } + + inputData.hostToDevice(CL_FALSE); + kernel.enqueue(queue, counter); + outputData.deviceToHost(CL_TRUE); + check(outputData[2][1][10076], std::complex<float>(7.0f, 0)); + } + } +}; + + +struct BeamFormerTest : public UnitTest +{ + BeamFormerTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/BeamFormer.cl") + { + if (ps.nrStations() >= 5 && ps.nrSamplesPerChannel() >= 13 && ps.nrChannelsPerSubband() >= 7 && ps.nrTABs() >= 6) { + MultiArraySharedBuffer<std::complex<float>, 4> inputData(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE); + BeamFormerKernel beamFormer(ps, program, complexVoltages, inputData, beamFormerWeights); + + inputData[4][6][12][1] = std::complex<float>(2.2, 3); + beamFormerWeights[4][6][5] = std::complex<float>(4, 5); + + inputData.hostToDevice(CL_FALSE); + beamFormerWeights.hostToDevice(CL_FALSE); + beamFormer.enqueue(queue, counter); + complexVoltages.deviceToHost(CL_TRUE); + + check(complexVoltages[6][12][5][1], std::complex<float>(-6.2, 23)); + +#if 0 + for (unsigned tab = 0; tab < ps.nrTABs(); tab ++) + for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) + for (unsigned ch = 0; ch < ps.nrChannelsPerSubband(); ch ++) + for (unsigned t = 0; t < ps.nrSamplesPerChannel(); t ++) + if (complexVoltages[tab][pol][ch][t] != std::complex<float>(0, 0)) + std::cout << "complexVoltages[" << tab << "][" << pol << "][" << ch << "][" << t << "] = " << complexVoltages[tab][pol][ch][t] << std::endl; +#endif + } + } +}; + + +struct BeamFormerTransposeTest : public UnitTest +{ + BeamFormerTransposeTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/Transpose.cl") + { + if (ps.nrChannelsPerSubband() >= 19 && ps.nrSamplesPerChannel() >= 175 && ps.nrTABs() >= 5) { + MultiArraySharedBuffer<std::complex<float>, 4> transposedData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + BeamFormerTransposeKernel transpose(ps, program, transposedData, complexVoltages); + + complexVoltages[18][174][4][1] = std::complex<float>(24, 42); + + complexVoltages.hostToDevice(CL_FALSE); + transpose.enqueue(queue, counter); + transposedData.deviceToHost(CL_TRUE); + + check(transposedData[4][1][174][18], std::complex<float>(24, 42)); + } + } +}; + + +struct DedispersionChirpTest : public UnitTest +{ + DedispersionChirpTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/Dedispersion.cl") + { + if (ps.nrTABs() > 3 && ps.nrChannelsPerSubband() > 13 && ps.nrSamplesPerChannel() / ps.dedispersionFFTsize() > 1 && ps.dedispersionFFTsize() > 77) { + MultiArraySharedBuffer<std::complex<float>, 5> data(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel() / ps.dedispersionFFTsize()][ps.dedispersionFFTsize()], queue, CL_MEM_READ_WRITE, CL_MEM_READ_WRITE); + MultiArraySharedBuffer<float, 1> DMs(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + DedispersionChirpKernel dedispersionChirpKernel(ps, program, queue, data, DMs); + + data[3][1][13][1][77] = std::complex<float>(2, 3); + DMs[3] = 2; + + DMs.hostToDevice(CL_FALSE); + data.hostToDevice(CL_FALSE); + dedispersionChirpKernel.enqueue(queue, counter, 60e6); + data.deviceToHost(CL_TRUE); + + std::cout << data[3][1][13][1][77] << std::endl; + } + } +}; + + +struct CoherentStokesTest : public UnitTest +{ + CoherentStokesTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/CoherentStokes.cl") + { + if (ps.nrChannelsPerSubband() >= 19 && ps.nrSamplesPerChannel() >= 175 && ps.nrTABs() >= 5) { + MultiArraySharedBuffer<float, 4> stokesData(boost::extents[ps.nrTABs()][ps.nrCoherentStokes()][ps.nrSamplesPerChannel() / ps.coherentStokesTimeIntegrationFactor()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); +#if 1 + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + CoherentStokesKernel stokesKernel(ps, program, stokesData, complexVoltages); + + complexVoltages[18][174][4][0] = std::complex<float>(2, 3); + complexVoltages[18][174][4][1] = std::complex<float>(4, 5); +#else + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + CoherentStokesKernel stokesKernel(ps, program, stokesData, complexVoltages); + + complexVoltages[18][174][4][0] = std::complex<float>(2, 3); + complexVoltages[18][174][4][1] = std::complex<float>(4, 5); +#endif + + complexVoltages.hostToDevice(CL_FALSE); + stokesKernel.enqueue(queue, counter); + stokesData.deviceToHost(CL_TRUE); + + for (unsigned stokes = 0; stokes < ps.nrCoherentStokes(); stokes ++) + std::cout << stokesData[4][stokes][174 / ps.coherentStokesTimeIntegrationFactor()][18] << std::endl; + } + } +}; + + +struct UHEP_BeamFormerTest : public UnitTest +{ + UHEP_BeamFormerTest(const Parset &ps) + : + UnitTest(ps, "UHEP/BeamFormer.cl") + { + if (ps.nrStations() >= 5 && (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) >= 13 && ps.nrSubbands() >= 7 && ps.nrTABs() >= 6) { + MultiArraySharedBuffer<char, 5> inputSamples(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS][ps.nrBytesPerComplexSample()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE); + UHEP_BeamFormerKernel beamFormer(ps, program, complexVoltages, inputSamples, beamFormerWeights); + + switch (ps.nrBytesPerComplexSample()) { + case 4 : reinterpret_cast<std::complex<short> &>(inputSamples[4][6][12][1][0]) = std::complex<short>(2, 3); + break; + + case 2 : reinterpret_cast<std::complex<signed char> &>(inputSamples[4][6][12][1][0]) = std::complex<signed char>(2, 3); + break; + + case 1 : reinterpret_cast<i4complex &>(inputSamples[4][6][12][1][0]) = i4complex(2, 3); + break; + } + + beamFormerWeights[4][6][5] = std::complex<float>(4, 5); + + inputSamples.hostToDevice(CL_FALSE); + beamFormerWeights.hostToDevice(CL_FALSE); + beamFormer.enqueue(queue, counter); + complexVoltages.deviceToHost(CL_TRUE); + + check(complexVoltages[6][12][5][1], std::complex<float>(-7, 22)); + } + } +}; + + +struct UHEP_TransposeTest : public UnitTest +{ + UHEP_TransposeTest(const Parset &ps) + : + UnitTest(ps, "UHEP/Transpose.cl") + { + if (ps.nrSubbands() >= 19 && ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1 >= 175 && ps.nrTABs() >= 5) { + MultiArraySharedBuffer<std::complex<float>, 4> transposedData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][512], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + cl::Buffer devReverseSubbandMapping(context, CL_MEM_READ_ONLY, 512 * sizeof(int)); + UHEP_TransposeKernel transpose(ps, program, transposedData, complexVoltages, devReverseSubbandMapping); + + complexVoltages[18][174][4][1] = std::complex<float>(24, 42); + + queue.enqueueWriteBuffer(devReverseSubbandMapping, CL_FALSE, 0, 512 * sizeof(int), reverseSubbandMapping); + complexVoltages.hostToDevice(CL_FALSE); + transpose.enqueue(queue, counter); + transposedData.deviceToHost(CL_TRUE); + + check(transposedData[4][1][174][38], std::complex<float>(24, 42)); + } + } +}; + + +struct UHEP_TriggerTest : public UnitTest +{ + UHEP_TriggerTest(const Parset &ps) + : + UnitTest(ps, "UHEP/Trigger.cl") + { + if (ps.nrTABs() >= 4 && 1024 * ps.nrSamplesPerChannel() > 100015) { + MultiArraySharedBuffer<float, 3> inputData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel() * 1024], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<TriggerInfo, 1> triggerInfo(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + UHEP_TriggerKernel trigger(ps, program, triggerInfo, inputData); + + inputData[3][1][100015] = 1000; + + inputData.hostToDevice(CL_FALSE); + trigger.enqueue(queue, counter); + triggerInfo.deviceToHost(CL_TRUE); + + std::cout << "trigger info: mean = " << triggerInfo[3].mean << ", variance = " << triggerInfo[3].variance << ", bestValue = " << triggerInfo[3].bestValue << ", bestApproxIndex = " << triggerInfo[3].bestApproxIndex << std::endl; + //check(triggerInfo[3].mean, (float) (1000.0f * 1000.0f) / (float) (ps.nrSamplesPerChannel() * 1024)); + check(triggerInfo[3].bestValue, 1000.0f * 1000.0f); + check(triggerInfo[3].bestApproxIndex, 100016U); + } + } +}; + + +#if 0 +struct FFT_Test : public UnitTest +{ + FFT_Test(const Parset &ps) + : UnitTest(ps, "fft.cl") + { + MultiArraySharedBuffer<std::complex<float>, 1> in(boost::extents[8], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 1> out(boost::extents[8], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + + for (unsigned i = 0; i < 8; i ++) + in[i] = std::complex<float>(2 * i + 1, 2 * i + 2); + + clAmdFftSetupData setupData; + cl::detail::errHandler(clAmdFftInitSetupData(&setupData), "clAmdFftInitSetupData"); + setupData.debugFlags = CLFFT_DUMP_PROGRAMS; + cl::detail::errHandler(clAmdFftSetup(&setupData), "clAmdFftSetup"); + + clAmdFftPlanHandle plan; + size_t dim[1] = { 8 }; + + cl::detail::errHandler(clAmdFftCreateDefaultPlan(&plan, context(), CLFFT_1D, dim), "clAmdFftCreateDefaultPlan"); + cl::detail::errHandler(clAmdFftSetResultLocation(plan, CLFFT_OUTOFPLACE), "clAmdFftSetResultLocation"); + cl::detail::errHandler(clAmdFftSetPlanBatchSize(plan, 1), "clAmdFftSetPlanBatchSize"); + cl::detail::errHandler(clAmdFftBakePlan(plan, 1, &queue(), 0, 0), "clAmdFftBakePlan"); + + in.hostToDevice(CL_FALSE); + cl_mem ins[1] = { ((cl::Buffer) in)() }; + cl_mem outs[1] = { ((cl::Buffer) out)() }; +#if 1 + cl::detail::errHandler(clAmdFftEnqueueTransform(plan, CLFFT_FORWARD, 1, &queue(), 0, 0, 0, ins, outs, 0), "clAmdFftEnqueueTransform"); +#else + cl::Kernel kernel(program, "fft_fwd"); + kernel.setArg(0, (cl::Buffer) in); + kernel.setArg(1, (cl::Buffer) out); + queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(64, 1, 1), cl::NDRange(64, 1, 1)); +#endif + out.deviceToHost(CL_TRUE); + + for (unsigned i = 0; i < 8; i ++) + std::cout << out[i] << std::endl; + + cl::detail::errHandler(clAmdFftDestroyPlan(&plan), "clAmdFftDestroyPlan"); + cl::detail::errHandler(clAmdFftTeardown(), "clAmdFftTeardown"); + } +}; +#endif + + +} // namespace RTCP +} // namespace LOFAR + +int main(int argc, char **argv) +{ + using namespace LOFAR::RTCP; + + std::cout << "running ..." << std::endl; + + if (setenv("DISPLAY", ":0.0", 1) < 0) { + perror("error setting DISPLAY"); + exit(1); + } + + if (argc != 2) { + std::cerr << "usage: " << argv[0] << " parset" << std::endl; + exit(1); + } + +#if 0 && defined __linux__ + set_affinity(0); +#endif + + try { + Parset ps(argv[1]); + +#if 0 + const char *str = getenv("NR_STATIONS"); + ps.nrStations() = str ? atoi(str) : 77; +#endif + std::cout << "nr stations = " << ps.nrStations() << std::endl; + + const char *str = getenv("NR_GPUS"); + nrGPUs = str ? atoi(str) : 1; + +#if 0 + ps.nrSubbands() = 10;//488; + ps.nrChannelsPerSubband() = 64; + ps.nrBeams() = 1; + ps.nrSamplesPerChannel() = 196608 / ps.nrChannelsPerSubband(); + ps.subbandBandwidth() = 195312.5; + ps.correctBandPass() = true; +#endif + + //profiling = false; CorrelatorPipeline(ps).doWork(); + //profiling = true; CorrelatorPipeline(ps).doWork(); + + (CorrelatorTest)(ps); + +#if 0 + ps.nrSubbands() = 488; + ps.nrChannelsPerSubband() = 2048; + ps.nrBeams() = 1; + ps.nrTABs() = 128; + ps.nrIncoherentStokes() = 4; + ps.nrCoherentStokes() = 4; + ps.incoherentStokesTimeIntegrationFactor() = 8; + ps.coherentStokesTimeIntegrationFactor() = 8; + ps.nrSamplesPerChannel() = 65536 / ps.nrChannelsPerSubband();//262144 / ps.nrChannelsPerSubband(); + ps.subbandBandwidth() = 195312.5; + ps.correctBandPass() = true; + ps.dedispersionFFTsize() = ps.nrSamplesPerChannel(); + + profiling = false; BeamFormerPipeline(ps).doWork(); + profiling = true; BeamFormerPipeline(ps).doWork(); + //(IncoherentStokesTest)(ps); + //(IntToFloatTest)(ps); + //(BeamFormerTest)(ps); + //(BeamFormerTransposeTest)(ps); + //(DedispersionChirpTest)(ps); + //(CoherentStokesTest)(ps); +#endif + +#if 0 + ps.nrSubbands() = 488; + ps.nrSamplesPerChannel() = 1024; + ps.nrBeams() = 1; + ps.subbandBandwidth() = 195312.5; + ps.nrTABs() = 48; + + profiling = false; UHEP_Pipeline(ps).doWork(); + profiling = true; UHEP_Pipeline(ps).doWork(); + //(UHEP_BeamFormerTest)(ps); + //(UHEP_TransposeTest)(ps); + //(UHEP_TriggerTest)(ps); +#endif + +#if 0 + (FFT_Test)(ps); +#endif + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + + return 0; +} diff --git a/RTCP/GPUProc/src/RTCP.cc.not b/RTCP/GPUProc/src/RTCP.cc.not new file mode 100644 index 0000000000000000000000000000000000000000..3673b79a903558e9e9cd0eb21e1162929059144f --- /dev/null +++ b/RTCP/GPUProc/src/RTCP.cc.not @@ -0,0 +1,2020 @@ +#include "lofar_config.h" + +#define __CL_ENABLE_EXCEPTIONS +#include <CL/cl.hpp> + +#include <omp.h> + +#include <cmath> +#include <complex> +#include <cstdio> +#include <cstdlib> +#include <fstream> +#include <iomanip> +#include <iostream> +#include <sstream> +#include <boost/multi_array.hpp> + +#include "Align.h" +#include "BandPass.h" +#include "BeamletBuffer.h" +#include "Common.h" +#include "FilterBank.h" +#include "Common/lofar_datetime.h" +#include "Common/LofarLogger.h" +#include "Common/SystemUtil.h" +#include "Interface/Parset.h" +#include "OpenCL_FFT/clFFT.h" +#include "OpenCL_Support.h" +#include "OpenMP_Support.h" +#include "UHEP/InvertedStationPPFWeights.h" +//#include "clAmdFft/include/clAmdFft.h" + +namespace LOFAR { +namespace RTCP { + +bool profiling = true; +unsigned nrGPUs; + +//#define NR_BITS_PER_SAMPLE 8 +#define NR_POLARIZATIONS 2 +#define NR_TAPS 16 +#define NR_STATION_FILTER_TAPS 16 + +#define USE_2X2 +#undef USE_CUSTOM_FFT +#undef USE_TEST_DATA +#undef USE_B7015 + +#if 0 +#if NR_BITS_PER_SAMPLE == 16 +typedef std::complex<short> SampleType; +#elif NR_BITS_PER_SAMPLE == 8 +typedef std::complex<signed char> SampleType; +#endif +#endif + + +#if 0 + +class Parset +{ + public: + unsigned nrStations() const { return _nrStations; } + unsigned nrBaselines() const { return nrStations() * (nrStations() + 1) / 2; } + unsigned nrSubbands() const { return _nrSubbands; } + float subbandBandwidth() const { return _subbandBandwidth; } + unsigned nrChannelsPerSubband() const { return _nrChannelsPerSubband; } + unsigned nrBeams() const { return _nrBeams; } + unsigned nrSamplesPerChannel() const { return _nrSamplesPerChannel; } + bool correctBandPass() const { return _correctBandPass; } + unsigned nrTABs() const { return _nrTABs; } + unsigned nrCoherentStokes() const { return _nrCoherentStokes; } + unsigned nrIncoherentStokes() const { return _nrIncoherentStokes; } + unsigned coherentStokesTimeIntegrationFactor() const { return _coherentStokesTimeIntegrationFactor; } + unsigned incoherentStokesTimeIntegrationFactor() const { return _incoherentStokesTimeIntegrationFactor; } + unsigned dedispersionFFTsize() const { return _dedispersionFFTsize; } + + unsigned &nrStations() { return _nrStations; } + unsigned &nrSubbands() { return _nrSubbands; } + float &subbandBandwidth() { return _subbandBandwidth; } + unsigned &nrChannelsPerSubband() { return _nrChannelsPerSubband; } + unsigned &nrBeams() { return _nrBeams; } + unsigned &nrSamplesPerChannel() { return _nrSamplesPerChannel; } + bool &correctBandPass() { return _correctBandPass; } + unsigned &nrTABs() { return _nrTABs; } + unsigned &nrCoherentStokes() { return _nrCoherentStokes; } + unsigned &nrIncoherentStokes() { return _nrIncoherentStokes; } + unsigned &coherentStokesTimeIntegrationFactor() { return _coherentStokesTimeIntegrationFactor; } + unsigned &incoherentStokesTimeIntegrationFactor() { return _incoherentStokesTimeIntegrationFactor; } + unsigned &dedispersionFFTsize() { return _dedispersionFFTsize; } + + private: + unsigned _nrStations; + unsigned _nrSubbands; + float _subbandBandwidth; + unsigned _nrChannelsPerSubband; + unsigned _nrBeams; + unsigned _nrSamplesPerChannel; + bool _correctBandPass; + unsigned _nrTABs; + unsigned _nrCoherentStokes; + unsigned _nrIncoherentStokes; + unsigned _coherentStokesTimeIntegrationFactor; + unsigned _incoherentStokesTimeIntegrationFactor; + unsigned _dedispersionFFTsize; +}; + +#endif + + +class PerformanceCounter +{ + public: + PerformanceCounter(const std::string &name); + ~PerformanceCounter(); + + void doOperation(cl::Event &, size_t nrOperations, size_t nrBytesRead, size_t nrBytesWritten); + + private: + static void eventCompleteCallBack(cl_event, cl_int /*status*/, void *counter); + + size_t totalNrOperations, totalNrBytesRead, totalNrBytesWritten; + double totalTime; + unsigned totalEvents; + const std::string name; +}; + + +PerformanceCounter::PerformanceCounter(const std::string &name) +: + totalNrOperations(0), + totalNrBytesRead(0), + totalNrBytesWritten(0), + totalTime(0), + totalEvents(0), + name(name) +{ +} + + +PerformanceCounter::~PerformanceCounter() +{ + if (totalTime > 0) +#pragma omp critical (cout) + std::cout << std::setw(12) << name + << std::setprecision(3) + << ": avg. time = " << 1000 * totalTime / totalEvents << " ms, " + "GFLOP/s = " << totalNrOperations / totalTime / 1e9 << ", " + "R/W = " << totalNrBytesRead / totalTime / 1e9 << '+' + << totalNrBytesWritten / totalTime / 1e9 << '=' + << (totalNrBytesRead + totalNrBytesWritten) / totalTime / 1e9 << " GB/s" + << std::endl; +} + + +void PerformanceCounter::eventCompleteCallBack(cl_event ev, cl_int /*status*/, void *counter) +{ + cl::Event event(ev); + + size_t queued, submitted, start, stop; + event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &queued); + event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &submitted); + event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start); + event.getProfilingInfo(CL_PROFILING_COMMAND_END, &stop); + double seconds = (stop - start) / 1e9; + + if (seconds < 0 || seconds > 15) +#pragma omp critical (cout) + std::cout << "BAH! " << omp_get_thread_num() << ": " << queued << ' ' << submitted - queued << ' ' << start - queued << ' ' << stop - queued << std::endl; + +#pragma omp atomic + static_cast<PerformanceCounter *>(counter)->totalTime += seconds; + + // cl::~Event() decreases ref count +} + + +void PerformanceCounter::doOperation(cl::Event &event, size_t nrOperations, size_t nrBytesRead, size_t nrBytesWritten) +{ + // reference count between C and C++ conversions is serously broken in C++ wrapper + cl_event ev = event(); + cl_int error = clRetainEvent(ev); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clRetainEvent"); + + if (profiling) { + event.setCallback(CL_COMPLETE, &PerformanceCounter::eventCompleteCallBack, this); + +#pragma omp atomic + totalNrOperations += nrOperations; +#pragma omp atomic + totalNrBytesRead += nrBytesRead; +#pragma omp atomic + totalNrBytesWritten += nrBytesWritten; +#pragma omp atomic + ++ totalEvents; + } +} + + +cl::Program createProgram(const Parset &ps, cl::Context &context, std::vector<cl::Device> &devices, const char *sources) +{ + std::stringstream args; + args << "-cl-fast-relaxed-math"; + + std::vector<cl_context_properties> properties; + context.getInfo(CL_CONTEXT_PROPERTIES, &properties); + + if (cl::Platform((cl_platform_id) properties[1]).getInfo<CL_PLATFORM_NAME>() == "NVIDIA CUDA") { + args << " -cl-nv-verbose"; + args << " -cl-nv-opt-level=99"; + //args << " -cl-nv-maxrregcount=63"; + args << " -DNVIDIA_CUDA"; + } + + //if (devices[0].getInfo<CL_DEVICE_NAME>() == "GeForce GTX 680") + //args << " -DUSE_FLOAT4_IN_CORRELATOR"; + + args << " -I" << dirname(__FILE__); + args << " -DNR_BITS_PER_SAMPLE=" << ps.nrBitsPerSample(); + args << " -DSUBBAND_BANDWIDTH=" << std::setprecision(7) << ps.subbandBandwidth() << 'f'; + args << " -DNR_SUBBANDS=" << ps.nrSubbands(); + args << " -DNR_CHANNELS=" << ps.nrChannelsPerSubband(); + args << " -DNR_STATIONS=" << ps.nrStations(); + args << " -DNR_SAMPLES_PER_CHANNEL=" << ps.nrSamplesPerChannel(); + args << " -DNR_SAMPLES_PER_SUBBAND=" << ps.nrSamplesPerSubband(); + args << " -DNR_BEAMS=" << ps.nrBeams(); + args << " -DNR_TABS=" << ps.nrTABs(); + args << " -DNR_COHERENT_STOKES=" << ps.nrCoherentStokes(); + args << " -DNR_INCOHERENT_STOKES=" << ps.nrIncoherentStokes(); + args << " -DCOHERENT_STOKES_TIME_INTEGRATION_FACTOR=" << ps.coherentStokesTimeIntegrationFactor(); + args << " -DINCOHERENT_STOKES_TIME_INTEGRATION_FACTOR=" << ps.incoherentStokesTimeIntegrationFactor(); + args << " -DNR_POLARIZATIONS=" << NR_POLARIZATIONS; + args << " -DNR_TAPS=" << NR_TAPS; + args << " -DNR_STATION_FILTER_TAPS=" << NR_STATION_FILTER_TAPS; + + if (ps.delayCompensation()) + args << " -DDELAY_COMPENSATION"; + + if (ps.correctBandPass()) + args << " -DBANDPASS_CORRECTION"; + + args << " -DDEDISPERSION_FFT_SIZE=" << ps.dedispersionFFTsize(); + return createProgram(context, devices, sources, args.str().c_str()); +} + + +class FFT_Plan +{ + public: + FFT_Plan(cl::Context &context, unsigned fftSize) + { + clFFT_Dim3 dim = { fftSize, 1, 1 }; + cl_int error; + plan = clFFT_CreatePlan(context(), dim, clFFT_1D, clFFT_InterleavedComplexFormat, &error); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clFFT_CreatePlan"); + + //clFFT_DumpPlan(plan, stdout); + } + + ~FFT_Plan() + { + clFFT_DestroyPlan(plan); + } + + clFFT_Plan plan; +}; + + +template <typename SampleType> class Pipeline +{ + public: + Pipeline(const Parset &); + + cl::Program createProgram(const char *sources); + + const Parset &ps; + cl::Context context; + std::vector<cl::Device> devices; +}; + + +template <typename SampleType> class CorrelatorPipeline : public Pipeline<SampleType> +{ + public: + CorrelatorPipeline(const Parset &); + + void doWork(); + + //private: + //friend class CorrelatorWorkQueue; + + FilterBank filterBank; + + cl::Program firFilterProgram, delayAndBandPassProgram, correlatorProgram; + PerformanceCounter firFilterCounter, delayAndBandPassCounter, correlatorCounter, fftCounter; + PerformanceCounter samplesCounter, visibilitiesCounter; + +#if defined USE_B7015 + OMP_Lock hostToDeviceLock[4], deviceToHostLock[4]; +#endif +}; + + +template <typename SampleType> class BeamFormerPipeline : public Pipeline<SampleType> +{ + public: + BeamFormerPipeline(const Parset &); + + void doWork(); + + cl::Program intToFloatProgram, delayAndBandPassProgram, beamFormerProgram, transposeProgram, dedispersionChirpProgram; + + PerformanceCounter intToFloatCounter, fftCounter, delayAndBandPassCounter, beamFormerCounter, transposeCounter, dedispersionForwardFFTcounter, dedispersionChirpCounter, dedispersionBackwardFFTcounter; + PerformanceCounter samplesCounter; +}; + + +template <typename SampleType> class UHEP_Pipeline : public Pipeline<SampleType> +{ + public: + UHEP_Pipeline(const Parset &); + + void doWork(); + + cl::Program beamFormerProgram, transposeProgram, invFFTprogram, invFIRfilterProgram, triggerProgram; + PerformanceCounter beamFormerCounter, transposeCounter, invFFTcounter, invFIRfilterCounter, triggerCounter; + PerformanceCounter beamFormerWeightsCounter, samplesCounter; +}; + + +template <typename SampleType> Pipeline<SampleType>::Pipeline(const Parset &ps) +: + ps(ps) +{ + createContext(context, devices); +} + + +template <typename SampleType> cl::Program Pipeline<SampleType>::createProgram(const char *sources) +{ + return LOFAR::RTCP::createProgram(ps, context, devices, sources); +} + + +template <typename SampleType> CorrelatorPipeline<SampleType>::CorrelatorPipeline(const Parset &ps) +: + Pipeline<SampleType>(ps), + filterBank(true, NR_TAPS, ps.nrChannelsPerSubband(), KAISER), + firFilterCounter("FIR filter"), + delayAndBandPassCounter("delay/bp"), + correlatorCounter("correlator"), + fftCounter("FFT"), + samplesCounter("samples"), + visibilitiesCounter("visibilities") +{ + filterBank.negateWeights(); + + double startTime = getTime(); + +#pragma omp parallel sections + { +#pragma omp section + firFilterProgram = this->createProgram("FIR.cl"); +#pragma omp section + delayAndBandPassProgram = this->createProgram("DelayAndBandPass.cl"); +#pragma omp section + correlatorProgram = this->createProgram("Correlator.cl"); + } + + std::cout << "compile time = " << getTime() - startTime << std::endl; +} + + +template <typename SampleType> BeamFormerPipeline<SampleType>::BeamFormerPipeline(const Parset &ps) +: + Pipeline<SampleType>(ps), + intToFloatCounter("int-to-float"), + fftCounter("FFT"), + delayAndBandPassCounter("delay/bp"), + beamFormerCounter("beamformer"), + transposeCounter("transpose"), + dedispersionForwardFFTcounter("ddisp.fw.FFT"), + dedispersionChirpCounter("chirp"), + dedispersionBackwardFFTcounter("ddisp.bw.FFT"), + samplesCounter("samples") +{ + double startTime = getTime(); + +#pragma omp parallel sections + { +#pragma omp section + intToFloatProgram = this->createProgram("BeamFormer/IntToFloat.cl"); +#pragma omp section + delayAndBandPassProgram = this->createProgram("DelayAndBandPass.cl"); +#pragma omp section + beamFormerProgram = this->createProgram("BeamFormer/BeamFormer.cl"); +#pragma omp section + transposeProgram = this->createProgram("BeamFormer/Transpose.cl"); +#pragma omp section + dedispersionChirpProgram = this->createProgram("BeamFormer/Dedispersion.cl"); + } + + std::cout << "compile time = " << getTime() - startTime << std::endl; +} + + +template <typename SampleType> UHEP_Pipeline<SampleType>::UHEP_Pipeline(const Parset &ps) +: + Pipeline<SampleType>(ps), + beamFormerCounter("beamformer"), + transposeCounter("transpose"), + invFFTcounter("inv. FFT"), + invFIRfilterCounter("inv. FIR"), + triggerCounter("trigger"), + beamFormerWeightsCounter("BF weights"), + samplesCounter("samples") +{ + double startTime = getTime(); + +#pragma omp parallel sections + { +#pragma omp section + beamFormerProgram = this->createProgram("UHEP/BeamFormer.cl"); +#pragma omp section + transposeProgram = this->createProgram("UHEP/Transpose.cl"); +#pragma omp section + invFFTprogram = this->createProgram("UHEP/InvFFT.cl"); +#pragma omp section + invFIRfilterProgram = this->createProgram("UHEP/InvFIR.cl"); +#pragma omp section + triggerProgram = this->createProgram("UHEP/Trigger.cl"); + } + + std::cout << "compile time = " << getTime() - startTime << std::endl; +} + + +template <typename SampleType> class WorkQueue +{ + public: + WorkQueue(Pipeline<SampleType> &); + + const unsigned gpu; + cl::Device &device; + cl::CommandQueue queue; + + protected: + const Parset &ps; +}; + + +template <typename SampleType> class CorrelatorWorkQueue : public WorkQueue<SampleType> +{ + public: + CorrelatorWorkQueue(CorrelatorPipeline<SampleType> &); + + void doWork(); + +#if defined USE_TEST_DATA + void setTestPattern(); + void printTestOutput(); +#endif + + //private: + CorrelatorPipeline<SampleType> &pipeline; + cl::Buffer devFIRweights; + MultiArraySharedBuffer<float, 1> bandPassCorrectionWeights; + MultiArraySharedBuffer<float, 3> delaysAtBegin, delaysAfterEnd; + MultiArraySharedBuffer<float, 2> phaseOffsets; + MultiArraySharedBuffer<SampleType, 3> inputSamples; + + cl::Buffer devFilteredData; + cl::Buffer devCorrectedData; + + MultiArraySharedBuffer<std::complex<float>, 4> visibilities; +}; + + +template <typename SampleType> class BeamFormerWorkQueue : public WorkQueue<SampleType> +{ + public: + BeamFormerWorkQueue(BeamFormerPipeline<SampleType> &); + + void doWork(); + + BeamFormerPipeline<SampleType> &pipeline; + + MultiArraySharedBuffer<SampleType, 3> inputSamples; + cl::Buffer devFilteredData; + MultiArraySharedBuffer<float, 1> bandPassCorrectionWeights; + MultiArraySharedBuffer<float, 3> delaysAtBegin, delaysAfterEnd; + MultiArraySharedBuffer<float, 2> phaseOffsets; + cl::Buffer devCorrectedData; + MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights; + cl::Buffer devComplexVoltages; + MultiArraySharedBuffer<std::complex<float>, 4> transposedComplexVoltages; + MultiArraySharedBuffer<float, 1> DMs; +}; + + +struct TriggerInfo { + float mean, variance, bestValue; + unsigned bestApproxIndex; +}; + + +template <typename SampleType> class UHEP_WorkQueue : public WorkQueue<SampleType> +{ + public: + UHEP_WorkQueue(UHEP_Pipeline<SampleType> &); + + void doWork(const float *delaysAtBegin, const float *delaysAfterEnd, const float *phaseOffsets); + + UHEP_Pipeline<SampleType> &pipeline; + cl::Event inputSamplesEvent, beamFormerWeightsEvent; + + cl::Buffer devBuffers[2]; + cl::Buffer devInputSamples; + MultiArrayHostBuffer<SampleType, 4> hostInputSamples; + + cl::Buffer devBeamFormerWeights; + MultiArrayHostBuffer<std::complex<float>, 3> hostBeamFormerWeights; + + cl::Buffer devComplexVoltages; + cl::Buffer devReverseSubbandMapping; + cl::Buffer devFFTedData; + cl::Buffer devInvFIRfilteredData; + cl::Buffer devInvFIRfilterWeights; + + cl::Buffer devTriggerInfo; + VectorHostBuffer<TriggerInfo> hostTriggerInfo; +}; + + +class Kernel : public cl::Kernel +{ + public: + Kernel(const Parset &ps, cl::Program &program, const char *name) + : + cl::Kernel(program, name), + ps(ps) + { + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter) + { + queue.enqueueNDRangeKernel(*this, cl::NullRange, globalWorkSize, localWorkSize, 0, &event); + counter.doOperation(event, nrOperations, nrBytesRead, nrBytesWritten); + } + + protected: + cl::Event event; + const Parset &ps; + cl::NDRange globalWorkSize, localWorkSize; + size_t nrOperations, nrBytesRead, nrBytesWritten; +}; + + +template <typename SampleType> class FIR_FilterKernel : public Kernel +{ + public: + FIR_FilterKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devFilteredData, cl::Buffer &devInputSamples, cl::Buffer &devFIRweights) + : + Kernel(ps, program, "FIR_filter") + { + setArg(0, devFilteredData); + setArg(1, devInputSamples); + setArg(2, devFIRweights); + + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + unsigned totalNrThreads = ps.nrChannelsPerSubband() * NR_POLARIZATIONS * 2; + unsigned nrPasses = (totalNrThreads + maxNrThreads - 1) / maxNrThreads; + globalWorkSize = cl::NDRange(totalNrThreads, ps.nrStations()); + localWorkSize = cl::NDRange(totalNrThreads / nrPasses, 1); + + size_t nrSamples = (size_t) ps.nrStations() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS; + nrOperations = nrSamples * ps.nrSamplesPerChannel() * NR_TAPS * 2 * 2; + nrBytesRead = nrSamples * (NR_TAPS - 1 + ps.nrSamplesPerChannel()) * sizeof(SampleType); + nrBytesWritten = nrSamples * ps.nrSamplesPerChannel() * sizeof(std::complex<float>); + } +}; + + +class FFT_Kernel +{ + public: + FFT_Kernel(cl::Context &context, unsigned fftSize, unsigned nrFFTs, bool forward, cl::Buffer &buffer) + : + nrFFTs(nrFFTs), + fftSize(fftSize) +#if defined USE_CUSTOM_FFT + { + ASSERT(fftSize == 256); + ASSERT(forward); + std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); + cl::Program program = createProgram(context, devices, "FFT.cl", ""); + kernel = cl::Kernel(program, "fft0"); + kernel.setArg(0, buffer); + } +#else + , direction(forward ? clFFT_Forward : clFFT_Inverse), + plan(context, fftSize), + buffer(buffer) + { + } +#endif + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter) + { +#if defined USE_CUSTOM_FFT + queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(nrFFTs * 64 / 4, 4), cl::NDRange(64, 4), 0, &event); +#else + cl_int error = clFFT_ExecuteInterleaved(queue(), plan.plan, nrFFTs, direction, buffer(), buffer(), 0, 0, &event()); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clFFT_ExecuteInterleaved"); +#endif + + counter.doOperation(event, + (size_t) nrFFTs * 5 * fftSize * log2(fftSize), + (size_t) nrFFTs * fftSize * sizeof(std::complex<float>), + (size_t) nrFFTs * fftSize * sizeof(std::complex<float>)); + } + + private: + unsigned nrFFTs, fftSize; +#if defined USE_CUSTOM_FFT + cl::Kernel kernel; +#else + clFFT_Direction direction; + FFT_Plan plan; + cl::Buffer &buffer; +#endif + cl::Event event; +}; + + +class Filter_FFT_Kernel : public FFT_Kernel +{ + public: + Filter_FFT_Kernel(const Parset &ps, cl::Context &context, cl::Buffer &devFilteredData) + : + FFT_Kernel(context, ps.nrChannelsPerSubband(), ps.nrStations() * NR_POLARIZATIONS * ps.nrSamplesPerChannel(), true, devFilteredData) + { + } +}; + + +class DelayAndBandPassKernel : public Kernel +{ + public: + DelayAndBandPassKernel(const Parset &ps, cl::Program &program, cl::Buffer &devCorrectedData, cl::Buffer &devFilteredData, cl::Buffer &devDelaysAtBegin, cl::Buffer &devDelaysAfterEnd, cl::Buffer &devPhaseOffsets, cl::Buffer &devBandPassCorrectionWeights) + : + Kernel(ps, program, "applyDelaysAndCorrectBandPass") + { + ASSERT(ps.nrChannelsPerSubband() % 16 == 0 || ps.nrChannelsPerSubband() == 1); + ASSERT(ps.nrSamplesPerChannel() % 16 == 0); + + setArg(0, devCorrectedData); + setArg(1, devFilteredData); + setArg(4, devDelaysAtBegin); + setArg(5, devDelaysAfterEnd); + setArg(6, devPhaseOffsets); + setArg(7, devBandPassCorrectionWeights); + + globalWorkSize = cl::NDRange(256, ps.nrChannelsPerSubband() == 1 ? 1 : ps.nrChannelsPerSubband() / 16, ps.nrStations()); + localWorkSize = cl::NDRange(256, 1, 1); + + size_t nrSamples = ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS; + nrOperations = nrSamples * 12; + nrBytesRead = nrBytesWritten = nrSamples * sizeof(std::complex<float>); + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter, unsigned subband) + { + setArg(2, (float) ps.subbandToFrequencyMapping()[subband]); + setArg(3, 0); // beam + Kernel::enqueue(queue, counter); + } +}; + + +class CorrelatorKernel : public Kernel +{ + public: + CorrelatorKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devVisibilities, cl::Buffer &devCorrectedData) + : +#if defined USE_4X4 + Kernel(ps, program, "correlate_4x4") +#elif defined USE_3X3 + Kernel(ps, program, "correlate_3x3") +#elif defined USE_2X2 + Kernel(ps, program, "correlate_2x2") +#else + Kernel(ps, program, "correlate") +#endif + { + setArg(0, devVisibilities); + setArg(1, devCorrectedData); + + size_t maxNrThreads, preferredMultiple; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + + std::vector<cl_context_properties> properties; + queue.getInfo<CL_QUEUE_CONTEXT>().getInfo(CL_CONTEXT_PROPERTIES, &properties); + + if (cl::Platform((cl_platform_id) properties[1]).getInfo<CL_PLATFORM_NAME>() == "AMD Accelerated Parallel Processing") + preferredMultiple = 256; + else + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, &preferredMultiple); + +#if defined USE_4X4 + unsigned quartStations = (ps.nrStations() + 2) / 4; + unsigned nrBlocks = quartStations * (quartStations + 1) / 2; +#elif defined USE_3X3 + unsigned thirdStations = (ps.nrStations() + 2) / 3; + unsigned nrBlocks = thirdStations * (thirdStations + 1) / 2; +#elif defined USE_2X2 + unsigned halfStations = (ps.nrStations() + 1) / 2; + unsigned nrBlocks = halfStations * (halfStations + 1) / 2; +#else + unsigned nrBlocks = ps.nrBaselines(); +#endif + unsigned nrPasses = (nrBlocks + maxNrThreads - 1) / maxNrThreads; + unsigned nrThreads = (nrBlocks + nrPasses - 1) / nrPasses; + nrThreads = (nrThreads + preferredMultiple - 1) / preferredMultiple * preferredMultiple; + //std::cout << "nrBlocks = " << nrBlocks << ", nrPasses = " << nrPasses << ", preferredMultiple = " << preferredMultiple << ", nrThreads = " << nrThreads << std::endl; + + globalWorkSize = cl::NDRange(nrPasses * nrThreads, ps.nrChannelsPerSubband()); + localWorkSize = cl::NDRange(nrThreads, 1); + + nrOperations = (size_t) ps.nrChannelsPerSubband() * ps.nrBaselines() * ps.nrSamplesPerChannel() * 32; + nrBytesRead = (size_t) nrPasses * ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrBaselines() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS * NR_POLARIZATIONS * sizeof(std::complex<float>); + } +}; + + +class IntToFloatKernel : public Kernel +{ + public: + IntToFloatKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devFilteredData, cl::Buffer &devInputSamples) + : + Kernel(ps, program, "intToFloat") + { + setArg(0, devFilteredData); + setArg(1, devInputSamples); + + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + globalWorkSize = cl::NDRange(maxNrThreads, ps.nrStations()); + localWorkSize = cl::NDRange(maxNrThreads, 1); + + size_t nrSamples = ps.nrStations() * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS; + nrOperations = nrSamples * 2; + nrBytesRead = nrSamples * 2 * ps.nrBitsPerSample() / 8; + nrBytesWritten = nrSamples * sizeof(std::complex<float>); + } +}; + + +class IncoherentStokesKernel : public Kernel +{ + public: + IncoherentStokesKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devIncoherentStokes, cl::Buffer &devInputSamples) + : + Kernel(ps, program, "incoherentStokes") + { + setArg(0, devIncoherentStokes); + setArg(1, devInputSamples); + + unsigned nrTimes = ps.nrSamplesPerChannel() / ps.incoherentStokesTimeIntegrationFactor(); + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + unsigned nrPasses = (nrTimes + maxNrThreads - 1) / maxNrThreads; + unsigned nrTimesPerPass = (nrTimes + nrPasses - 1) / nrPasses; + globalWorkSize = cl::NDRange(nrTimesPerPass * nrPasses, ps.nrChannelsPerSubband()); + localWorkSize = cl::NDRange(nrTimesPerPass, 1); + + nrOperations = ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrStations() * (ps.nrIncoherentStokes() == 1 ? 8 : 20 + 2.0 / ps.incoherentStokesTimeIntegrationFactor()); + nrBytesRead = (size_t) ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrIncoherentStokes() * nrTimes * ps.nrChannelsPerSubband() * sizeof(float); + } +}; + + +class BeamFormerKernel : public Kernel +{ + public: + BeamFormerKernel(const Parset &ps, cl::Program &program, cl::Buffer &devComplexVoltages, cl::Buffer &devCorrectedData, cl::Buffer &devBeamFormerWeights) + : + Kernel(ps, program, "complexVoltages") + { + setArg(0, devComplexVoltages); + setArg(1, devCorrectedData); + setArg(2, devBeamFormerWeights); + + globalWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), ps.nrChannelsPerSubband()); + localWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), 1); + + // FIXME: nrTABs + //queue.enqueueNDRangeKernel(*this, cl::NullRange, cl::NDRange(16, ps.nrTABs(), ps.nrChannelsPerSubband()), cl::NDRange(16, ps.nrTABs(), 1), 0, &event); + + size_t count = ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS; + size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t nrSampleBytesPerPass = count * ps.nrStations() * sizeof(std::complex<float>); + size_t nrComplexVoltagesBytesPerPass = count * ps.nrTABs() * sizeof(std::complex<float>); + unsigned nrPasses = std::max((ps.nrStations() + 6) / 16, 1U); + nrOperations = count * ps.nrStations() * ps.nrTABs() * 8; + nrBytesRead = nrWeightsBytes + nrSampleBytesPerPass + (nrPasses - 1) * nrComplexVoltagesBytesPerPass; + nrBytesWritten = nrPasses * nrComplexVoltagesBytesPerPass; + } +}; + + +class BeamFormerTransposeKernel : public Kernel +{ + public: + BeamFormerTransposeKernel(const Parset &ps, cl::Program &program, cl::Buffer &devTransposedData, cl::Buffer &devComplexVoltages) + : + Kernel(ps, program, "transposeComplexVoltages") + { + ASSERT(ps.nrSamplesPerChannel() % 16 == 0); + setArg(0, devTransposedData); + setArg(1, devComplexVoltages); + + //globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, (ps.nrChannelsPerSubband() + 15) / 16); + globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, ps.nrSamplesPerChannel() / 16); + localWorkSize = cl::NDRange(256, 1, 1); + + nrOperations = 0; + nrBytesRead = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>), + //nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * sizeof(std::complex<float>); + } +}; + + +#if 0 +class Dedispersion_FFT_Kernel +{ + public: + Dedispersion_FFT_Kernel(const Parset &ps, cl::Context &context, cl::Buffer &buffer) + : + ps(ps), + plan(context, ps.dedispersionFFTsize()), + buffer(buffer) + { + ASSERT(ps.nrSamplesPerChannel() % ps.dedispersionFFTsize() == 0); + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter, clFFT_Direction direction) + { + size_t nrFFTs = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(); + + cl_int error = clFFT_ExecuteInterleaved(queue(), plan.plan, nrFFTs, direction, buffer(), buffer(), 0, 0, &event()); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clFFT_ExecuteInterleaved"); + + counter.doOperation(event, + nrFFTs * 5 * ps.dedispersionFFTsize() * log2(ps.dedispersionFFTsize()), + nrFFTs * ps.dedispersionFFTsize() * sizeof(std::complex<float>), + nrFFTs * ps.dedispersionFFTsize() * sizeof(std::complex<float>)); + } + + private: + const Parset &ps; + FFT_Plan plan; + cl::Buffer &buffer; + cl::Event event; +}; +#else +class DedispersionForwardFFTkernel : public FFT_Kernel +{ + public: + DedispersionForwardFFTkernel(const Parset &ps, cl::Context &context, cl::Buffer &buffer) + : + FFT_Kernel(context, ps.dedispersionFFTsize(), ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(), true, buffer) + { + ASSERT(ps.nrSamplesPerChannel() % ps.dedispersionFFTsize() == 0); + } +}; + + +class DedispersionBackwardFFTkernel : public FFT_Kernel +{ + public: + DedispersionBackwardFFTkernel(const Parset &ps, cl::Context &context, cl::Buffer &buffer) + : + FFT_Kernel(context, ps.dedispersionFFTsize(), ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(), false, buffer) + { + ASSERT(ps.nrSamplesPerChannel() % ps.dedispersionFFTsize() == 0); + } +}; +#endif + + +class DedispersionChirpKernel : public Kernel +{ + public: + DedispersionChirpKernel(const Parset &ps, cl::Program &program, cl::CommandQueue &queue, cl::Buffer &buffer, cl::Buffer &DMs) + : + Kernel(ps, program, "applyChirp") + { + setArg(0, buffer); + setArg(1, DMs); + + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + unsigned fftSize = ps.dedispersionFFTsize(); + + globalWorkSize = cl::NDRange(fftSize, ps.nrSamplesPerChannel() / fftSize, ps.nrChannelsPerSubband()); + //std::cout << "globalWorkSize = NDRange(" << fftSize << ", " << ps.nrSamplesPerChannel() / fftSize << ", " << ps.nrChannelsPerSubband() << ')' << std::endl; + + if (fftSize <= maxNrThreads) { + localWorkSize = cl::NDRange(fftSize, 1, maxNrThreads / fftSize); + //std::cout << "localWorkSize = NDRange(" << fftSize << ", 1, " << maxNrThreads / fftSize << ')' << std::endl; + } else { + unsigned divisor; + + for (divisor = 1; fftSize / divisor > maxNrThreads || fftSize % divisor != 0; divisor ++) + ; + + localWorkSize = cl::NDRange(fftSize / divisor, 1, 1); + //std::cout << "localWorkSize = NDRange(" << fftSize / divisor << ", 1, 1))" << std::endl; + } + + nrOperations = (size_t) NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * (9 * ps.nrTABs() + 17), + nrBytesRead = nrBytesWritten = sizeof(std::complex<float>) * ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel(); + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter, double subbandFrequency) + { + setArg(2, (float) subbandFrequency); + Kernel::enqueue(queue, counter); + } +}; + + +class CoherentStokesKernel : public Kernel +{ + public: + CoherentStokesKernel(const Parset &ps, cl::Program &program, cl::Buffer &devStokesData, cl::Buffer &devComplexVoltages) + : + Kernel(ps, program, "coherentStokes") + { + ASSERT(ps.nrChannelsPerSubband() >= 16 && ps.nrChannelsPerSubband() % 16 == 0); + ASSERT(ps.nrCoherentStokes() == 1 || ps.nrCoherentStokes() == 4); + setArg(0, devStokesData); + setArg(1, devComplexVoltages); + + globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, (ps.nrChannelsPerSubband() + 15) / 16); + localWorkSize = cl::NDRange(256, 1, 1); + + nrOperations = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * (ps.nrCoherentStokes() == 1 ? 8 : 20 + 2.0 / ps.coherentStokesTimeIntegrationFactor()); + nrBytesRead = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrTABs() * ps.nrCoherentStokes() * ps.nrSamplesPerChannel() / ps.coherentStokesTimeIntegrationFactor() * ps.nrChannelsPerSubband() * sizeof(float); + } +}; + + +template <typename SampleType> class UHEP_BeamFormerKernel : public Kernel +{ + public: + UHEP_BeamFormerKernel(const Parset &ps, cl::Program &program, cl::Buffer &devComplexVoltages, cl::Buffer &devInputSamples, cl::Buffer &devBeamFormerWeights) + : + Kernel(ps, program, "complexVoltages") + { + setArg(0, devComplexVoltages); + setArg(1, devInputSamples); + setArg(2, devBeamFormerWeights); + +#if 1 + globalWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), ps.nrSubbands()); + localWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), 1); + + size_t count = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS; + size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrSubbands() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t nrSampleBytes = count * ps.nrStations() * sizeof(SampleType); + size_t nrComplexVoltagesBytesPerPass = count * ps.nrTABs() * sizeof(std::complex<float>); + unsigned nrPasses = std::max((ps.nrStations() + 6) / 16, 1U); + nrOperations = count * ps.nrStations() * ps.nrTABs() * 8; + nrBytesRead = nrWeightsBytes + nrSampleBytes + (nrPasses - 1) * nrComplexVoltagesBytesPerPass; + nrBytesWritten = nrPasses * nrComplexVoltagesBytesPerPass; +#else + ASSERT(ps.nrTABs() % 3 == 0); + ASSERT(ps.nrStations() % 6 == 0); + unsigned nrThreads = NR_POLARIZATIONS * (ps.nrTABs() / 3) * (ps.nrStations() / 6); + globalWorkSize = cl::NDRange(nrThreads, ps.nrSubbands()); + localWorkSize = cl::NDRange(nrThreads, 1); + //globalWorkSize = cl::NDRange(ps.nrStations() / 6, ps.nrTABs() / 3, ps.nrSubbands()); + //localWorkSize = cl::NDRange(ps.nrStations() / 6, ps.nrTABs() / 3, 1); + + size_t count = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS; + size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrSubbands() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t nrSampleBytes = count * ps.nrStations() * sizeof(SampleType); + size_t nrComplexVoltagesBytes = count * ps.nrTABs() * sizeof(std::complex<float>); + nrOperations = count * ps.nrStations() * ps.nrTABs() * 8; + nrBytesRead = nrWeightsBytes + nrSampleBytes; + nrBytesWritten = nrComplexVoltagesBytes; +#endif + } +}; + + +class UHEP_TransposeKernel : public Kernel +{ + public: + UHEP_TransposeKernel(const Parset &ps, cl::Program &program, cl::Buffer &devFFTedData, cl::Buffer &devComplexVoltages, cl::Buffer &devReverseSubbandMapping) + : + Kernel(ps, program, "UHEP_Transpose") + { + setArg(0, devFFTedData); + setArg(1, devComplexVoltages); + setArg(2, devReverseSubbandMapping); + + globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, 512 / 16); + localWorkSize = cl::NDRange(256, 1, 1); + + nrOperations = 0; + nrBytesRead = (size_t) ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * 512 * sizeof(std::complex<float>); + } +}; + + +class UHEP_InvFFT_Kernel : public Kernel +{ + public: + UHEP_InvFFT_Kernel(const Parset &ps, cl::Program &program, cl::Buffer &devFFTedData) + : + Kernel(ps, program, "inv_fft") + { + setArg(0, devFFTedData); + setArg(1, devFFTedData); + + globalWorkSize = cl::NDRange(128, ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel()); + localWorkSize = cl::NDRange(128, 1); + + size_t nrFFTs = (size_t) ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1); + nrOperations = nrFFTs * 5 * 1024 * 10; + nrBytesRead = nrFFTs * 512 * sizeof(std::complex<float>); + nrBytesWritten = nrFFTs * 1024 * sizeof(float); + } +}; + + +class UHEP_InvFIR_Kernel : public Kernel +{ + public: + UHEP_InvFIR_Kernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devInvFIRfilteredData, cl::Buffer &devFFTedData, cl::Buffer &devInvFIRfilterWeights) + : + Kernel(ps, program, "invFIRfilter") + { + setArg(0, devInvFIRfilteredData); + setArg(1, devFFTedData); + setArg(2, devInvFIRfilterWeights); + + size_t maxNrThreads, nrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + // round down to nearest power of two + for (nrThreads = 1024; nrThreads > maxNrThreads; nrThreads /= 2) + ; + + globalWorkSize = cl::NDRange(1024, NR_POLARIZATIONS, ps.nrTABs()); + localWorkSize = cl::NDRange(nrThreads, 1, 1); + + size_t count = ps.nrTABs() * NR_POLARIZATIONS * 1024; + nrOperations = count * ps.nrSamplesPerChannel() * NR_STATION_FILTER_TAPS * 2; + nrBytesRead = count * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * sizeof(float); + nrBytesWritten = count * ps.nrSamplesPerChannel() * sizeof(float); + } +}; + + +class UHEP_TriggerKernel : public Kernel +{ + public: + UHEP_TriggerKernel(const Parset &ps, cl::Program &program, cl::Buffer &devTriggerInfo, cl::Buffer &devInvFIRfilteredData) + : + Kernel(ps, program, "trigger") + { + setArg(0, devTriggerInfo); + setArg(1, devInvFIRfilteredData); + + globalWorkSize = cl::NDRange(16, 16, ps.nrTABs()); + localWorkSize = cl::NDRange(16, 16, 1); + + nrOperations = (size_t) ps.nrTABs() * ps.nrSamplesPerChannel() * 1024 * (3 /* power */ + 2 /* window */ + 1 /* max */ + 7 /* mean/variance */); + nrBytesRead = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * 1024 * sizeof(float); + nrBytesWritten = (size_t) ps.nrTABs() * sizeof(TriggerInfo); + } +}; + + +template <typename SampleType> WorkQueue<SampleType>::WorkQueue(Pipeline<SampleType> &pipeline) +: + gpu(omp_get_thread_num() % nrGPUs), + device(pipeline.devices[gpu]), + ps(pipeline.ps) +{ +#if defined __linux__ && defined USE_B7015 + set_affinity(gpu); +#endif + + queue = cl::CommandQueue(pipeline.context, device, profiling ? CL_QUEUE_PROFILING_ENABLE : 0); +} + + +template <typename SampleType> CorrelatorWorkQueue<SampleType>::CorrelatorWorkQueue(CorrelatorPipeline<SampleType> &pipeline) +: + WorkQueue<SampleType>(pipeline), + pipeline(pipeline), + bandPassCorrectionWeights(boost::extents[this->ps.nrChannelsPerSubband()], this->queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAtBegin(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAfterEnd(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + phaseOffsets(boost::extents[ps.nrBeams()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + inputSamples(boost::extents[ps.nrStations()][(ps.nrSamplesPerChannel() + NR_TAPS - 1) * ps.nrChannelsPerSubband()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + visibilities(boost::extents[ps.nrBaselines()][ps.nrChannelsPerSubband()][NR_POLARIZATIONS][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY) +{ + size_t firWeightsSize = ps.nrChannelsPerSubband() * NR_TAPS * sizeof(float); + devFIRweights = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, firWeightsSize); + + size_t filteredDataSize = ps.nrStations() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>); + devFilteredData = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, filteredDataSize); + devCorrectedData = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, filteredDataSize); + + if (ps.correctBandPass()) { + BandPass::computeCorrectionFactors(bandPassCorrectionWeights.origin(), ps.nrChannelsPerSubband()); + bandPassCorrectionWeights.hostToDevice(CL_TRUE); + } +} + + +template <typename SampleType> void CorrelatorWorkQueue<SampleType>::doWork() +{ + queue.enqueueWriteBuffer(devFIRweights, CL_TRUE, 0, ps.nrChannelsPerSubband() * NR_TAPS * sizeof(float), pipeline.filterBank.getWeights().origin()); + + FIR_FilterKernel<SampleType> firFilterKernel(ps, queue, pipeline.firFilterProgram, devFilteredData, inputSamples, devFIRweights); + Filter_FFT_Kernel fftKernel(ps, pipeline.context, devFilteredData); + DelayAndBandPassKernel delayAndBandPassKernel(ps, pipeline.delayAndBandPassProgram, devCorrectedData, devFilteredData, delaysAtBegin, delaysAfterEnd, phaseOffsets, bandPassCorrectionWeights); + CorrelatorKernel correlatorKernel(ps, queue, pipeline.correlatorProgram, visibilities, devCorrectedData); + double startTime = ps.startTime(), currentTime, stopTime = ps.stopTime(), blockTime = ps.CNintegrationTime(); + +#pragma omp barrier + + double executionStartTime = getTime(); + + for (unsigned block = 0; (currentTime = startTime + block * blockTime) < stopTime; block ++) { +#pragma omp single +#pragma omp critical (cout) + std::cout << "block = " << block << ", time = " << to_simple_string(from_ustime_t(currentTime)) << std::endl; + + memset(delaysAtBegin.origin(), 0, delaysAtBegin.bytesize()); + memset(delaysAfterEnd.origin(), 0, delaysAfterEnd.bytesize()); + memset(phaseOffsets.origin(), 0, phaseOffsets.bytesize()); + + // FIXME!!! + if (ps.nrStations() >= 3) + delaysAtBegin[0][2][0] = 1e-6, delaysAfterEnd[0][2][0] = 1.1e-6; + + delaysAtBegin.hostToDevice(CL_FALSE); + delaysAfterEnd.hostToDevice(CL_FALSE); + phaseOffsets.hostToDevice(CL_FALSE); + queue.finish(); + +#pragma omp barrier + +#pragma omp for schedule(dynamic) + for (unsigned subband = 0; subband < ps.nrSubbands(); subband ++) { + try { +#if defined USE_TEST_DATA + if (subband == 0) + setTestPattern(); +#endif + + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.hostToDeviceLock[gpu / 2]); +#endif + inputSamples.hostToDevice(CL_TRUE); + pipeline.samplesCounter.doOperation(inputSamples.event, 0, 0, inputSamples.bytesize()); + } + + if (ps.nrChannelsPerSubband() > 1) { + firFilterKernel.enqueue(queue, pipeline.firFilterCounter); + fftKernel.enqueue(queue, pipeline.fftCounter); + } + + delayAndBandPassKernel.enqueue(queue, pipeline.delayAndBandPassCounter, subband); + correlatorKernel.enqueue(queue, pipeline.correlatorCounter); + queue.finish(); + + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.deviceToHostLock[gpu / 2]); +#endif + visibilities.deviceToHost(CL_TRUE); + pipeline.visibilitiesCounter.doOperation(visibilities.event, 0, visibilities.bytesize(), 0); + } + +#if defined USE_TEST_DATA + if (subband == 0) + printTestOutput(); +#endif + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + } + } + +#pragma omp barrier + +#pragma omp master + if (!profiling) +#pragma omp critical (cout) + std::cout << "run time = " << getTime() - executionStartTime << std::endl; +} + + +// complexVoltages() +// float2 (*ComplexVoltagesType)[NR_CHANNELS][NR_TIMES_PER_BLOCK][NR_TABS][NR_POLARIZATIONS]; +// transpose() +// +// float2 (*DedispersedDataType)[nrTABs][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()]; +// FFT() +// +// applyChrip() +// +// FFT-1() +// float2 (*DedispersedDataType)[nrTABs][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()]; +// (*ComplexVoltagesType)[NR_CHANNELS][NR_TIMES_PER_BLOCK][NR_TABS]; +// computeStokes() +// float (*StokesType)[NR_TABS][NR_STOKES][NR_TIMES_PER_BLOCK / STOKES_INTEGRATION_SAMPLES][NR_CHANNELS]; + + +template <typename SampleType> BeamFormerWorkQueue<SampleType>::BeamFormerWorkQueue(BeamFormerPipeline<SampleType> &pipeline) +: + WorkQueue<SampleType>(pipeline), + pipeline(pipeline), + inputSamples(boost::extents[ps.nrStations()][ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + devFilteredData(pipeline.context, CL_MEM_READ_WRITE, ps.nrStations() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>)), + bandPassCorrectionWeights(boost::extents[ps.nrChannelsPerSubband()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAtBegin(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAfterEnd(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + phaseOffsets(boost::extents[ps.nrBeams()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + devCorrectedData(cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS * sizeof(std::complex<float>))), + beamFormerWeights(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + devComplexVoltages(cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>))), + //transposedComplexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE) + transposedComplexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE), + DMs(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY) +{ + if (ps.correctBandPass()) { + BandPass::computeCorrectionFactors(bandPassCorrectionWeights.origin(), ps.nrChannelsPerSubband()); + bandPassCorrectionWeights.hostToDevice(CL_TRUE); + } +} + + +template <typename SampleType> void BeamFormerWorkQueue<SampleType>::doWork() +{ + //queue.enqueueWriteBuffer(devFIRweights, CL_TRUE, 0, firWeightsSize, firFilterWeights); + bandPassCorrectionWeights.hostToDevice(CL_TRUE); + DMs.hostToDevice(CL_TRUE); + + IntToFloatKernel intToFloatKernel(ps, queue, pipeline.intToFloatProgram, devFilteredData, inputSamples); + Filter_FFT_Kernel fftKernel(ps, pipeline.context, devFilteredData); + DelayAndBandPassKernel delayAndBandPassKernel(ps, pipeline.delayAndBandPassProgram, devCorrectedData, devFilteredData, delaysAtBegin, delaysAfterEnd, phaseOffsets, bandPassCorrectionWeights); + BeamFormerKernel beamFormerKernel(ps, pipeline.beamFormerProgram, devComplexVoltages, devCorrectedData, beamFormerWeights); + BeamFormerTransposeKernel transposeKernel(ps, pipeline.transposeProgram, transposedComplexVoltages, devComplexVoltages); + DedispersionForwardFFTkernel dedispersionForwardFFTkernel(ps, pipeline.context, transposedComplexVoltages); + DedispersionBackwardFFTkernel dedispersionBackwardFFTkernel(ps, pipeline.context, transposedComplexVoltages); + DedispersionChirpKernel dedispersionChirpKernel(ps, pipeline.dedispersionChirpProgram, queue, transposedComplexVoltages, DMs); + double startTime = ps.startTime(), currentTime, stopTime = ps.stopTime(), blockTime = ps.CNintegrationTime(); + +#pragma omp barrier + + double executionStartTime = getTime(); + + for (unsigned block = 0; (currentTime = startTime + block * blockTime) < stopTime; block ++) { +#pragma omp single +#pragma omp critical (cout) + std::cout << "block = " << block << ", time = " << to_simple_string(from_ustime_t(currentTime)) << std::endl; + + memset(delaysAtBegin.origin(), 0, delaysAtBegin.bytesize()); + memset(delaysAfterEnd.origin(), 0, delaysAfterEnd.bytesize()); + memset(phaseOffsets.origin(), 0, phaseOffsets.bytesize()); + + // FIXME!!! + if (ps.nrStations() >= 3) + delaysAtBegin[0][2][0] = 1e-6, delaysAfterEnd[0][2][0] = 1.1e-6; + + delaysAtBegin.hostToDevice(CL_FALSE); + delaysAfterEnd.hostToDevice(CL_FALSE); + phaseOffsets.hostToDevice(CL_FALSE); + beamFormerWeights.hostToDevice(CL_FALSE); + queue.finish(); + +#pragma omp barrier + +#pragma omp for schedule(dynamic) + for (unsigned subband = 0; subband < ps.nrSubbands(); subband ++) { + try { +#if 1 + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.hostToDeviceLock[gpu / 2]); +#endif + inputSamples.hostToDevice(CL_TRUE); + pipeline.samplesCounter.doOperation(inputSamples.event, 0, 0, inputSamples.bytesize()); + } +#endif + +//#pragma omp critical (GPU) +{ + if (ps.nrChannelsPerSubband() > 1) { + intToFloatKernel.enqueue(queue, pipeline.intToFloatCounter); + fftKernel.enqueue(queue, pipeline.fftCounter); + } + + delayAndBandPassKernel.enqueue(queue, pipeline.delayAndBandPassCounter, subband); + beamFormerKernel.enqueue(queue, pipeline.beamFormerCounter); + transposeKernel.enqueue(queue, pipeline.transposeCounter); + dedispersionForwardFFTkernel.enqueue(queue, pipeline.dedispersionForwardFFTcounter); + dedispersionChirpKernel.enqueue(queue, pipeline.dedispersionChirpCounter, ps.subbandToFrequencyMapping()[subband]); + dedispersionBackwardFFTkernel.enqueue(queue, pipeline.dedispersionBackwardFFTcounter); + + queue.finish(); +} + + //queue.enqueueReadBuffer(devComplexVoltages, CL_TRUE, 0, hostComplexVoltages.bytesize(), hostComplexVoltages.origin()); + //dedispersedData.deviceToHost(CL_TRUE); + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + } + } + +#pragma omp barrier + +#pragma omp master + if (!profiling) +#pragma omp critical (cout) + std::cout << "run time = " << getTime() - executionStartTime << std::endl; +} + + +template <typename SampleType> UHEP_WorkQueue<SampleType>::UHEP_WorkQueue(UHEP_Pipeline<SampleType> &pipeline) +: + WorkQueue<SampleType>(pipeline), + pipeline(pipeline), + hostInputSamples(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY), + hostBeamFormerWeights(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY), + hostTriggerInfo(ps.nrTABs(), queue, CL_MEM_READ_ONLY) +{ + size_t inputSamplesSize = ps.nrStations() * ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS * sizeof(SampleType); + size_t complexVoltagesSize = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t transposedDataSize = ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * 512 * sizeof(std::complex<float>); + size_t invFIRfilteredDataSize = ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * 512 * sizeof(std::complex<float>); + + size_t buffer0size = std::max(inputSamplesSize, transposedDataSize); + size_t buffer1size = std::max(complexVoltagesSize, invFIRfilteredDataSize); + + devBuffers[0] = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, buffer0size); + devBuffers[1] = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, buffer1size); + + size_t beamFormerWeightsSize = ps.nrStations() * ps.nrSubbands() * ps.nrTABs() * sizeof(std::complex<float>); + devBeamFormerWeights = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, beamFormerWeightsSize); + + devInputSamples = devBuffers[0]; + devComplexVoltages = devBuffers[1]; + + devReverseSubbandMapping = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, 512 * sizeof(int)); + devInvFIRfilterWeights = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, 1024 * NR_STATION_FILTER_TAPS * sizeof(float)); + devFFTedData = devBuffers[0]; + devInvFIRfilteredData = devBuffers[1]; + + devTriggerInfo = cl::Buffer(pipeline.context, CL_MEM_WRITE_ONLY, ps.nrTABs() * sizeof(TriggerInfo)); +} + + +template <typename SampleType> void UHEP_WorkQueue<SampleType>::doWork(const float * /*delaysAtBegin*/, const float * /*delaysAfterEnd*/, const float * /*phaseOffsets*/) +{ + UHEP_BeamFormerKernel<SampleType> beamFormer(ps, pipeline.beamFormerProgram, devComplexVoltages, devInputSamples, devBeamFormerWeights); + UHEP_TransposeKernel transpose(ps, pipeline.transposeProgram, devFFTedData, devComplexVoltages, devReverseSubbandMapping); + UHEP_InvFFT_Kernel invFFT(ps, pipeline.invFFTprogram, devFFTedData); + UHEP_InvFIR_Kernel invFIR(ps, queue, pipeline.invFIRfilterProgram, devInvFIRfilteredData, devFFTedData, devInvFIRfilterWeights); + UHEP_TriggerKernel trigger(ps, pipeline.triggerProgram, devTriggerInfo, devInvFIRfilteredData); + double startTime = ps.startTime(), stopTime = ps.stopTime(), blockTime = ps.CNintegrationTime(); + unsigned nrBlocks = (stopTime - startTime) / blockTime; + + queue.enqueueWriteBuffer(devInvFIRfilterWeights, CL_FALSE, 0, sizeof invertedStationPPFWeights, invertedStationPPFWeights); + queue.enqueueWriteBuffer(devReverseSubbandMapping, CL_TRUE, 0, 512 * sizeof(int), reverseSubbandMapping); + +#pragma omp barrier + + double executionStartTime = getTime(); + +#pragma omp for schedule(dynamic) + for (unsigned block = 0; block < nrBlocks; block ++) { + try { + double currentTime = startTime + block * blockTime; + +//#pragma omp single // FIXME: why does the compiler complain here??? +#pragma omp critical (cout) + std::cout << "block = " << block << ", time = " << to_simple_string(from_ustime_t(currentTime)) << std::endl; + +#if 0 + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.hostToDeviceLock[gpu / 2]); +#endif + queue.enqueueWriteBuffer(devInputSamples, CL_TRUE, 0, sampledDataSize, hostInputSamples.origin(), 0, &samplesEvent); + } +#endif + + queue.enqueueWriteBuffer(devBeamFormerWeights, CL_FALSE, 0, hostBeamFormerWeights.bytesize(), hostBeamFormerWeights.origin(), 0, &beamFormerWeightsEvent); + pipeline.beamFormerWeightsCounter.doOperation(beamFormerWeightsEvent, 0, 0, hostBeamFormerWeights.bytesize()); + + queue.enqueueWriteBuffer(devInputSamples, CL_FALSE, 0, hostInputSamples.bytesize(), hostInputSamples.origin(), 0, &inputSamplesEvent); + pipeline.samplesCounter.doOperation(inputSamplesEvent, 0, 0, hostInputSamples.bytesize()); + + beamFormer.enqueue(queue, pipeline.beamFormerCounter); + transpose.enqueue(queue, pipeline.transposeCounter); + invFFT.enqueue(queue, pipeline.invFFTcounter); + invFIR.enqueue(queue, pipeline.invFIRfilterCounter); + trigger.enqueue(queue, pipeline.triggerCounter); + queue.finish(); // necessary to overlap I/O & computations ??? + queue.enqueueReadBuffer(devTriggerInfo, CL_TRUE, 0, hostTriggerInfo.size() * sizeof(TriggerInfo), &hostTriggerInfo[0]); + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + } + +#pragma omp barrier + +#pragma omp master + if (!profiling) +#pragma omp critical (cout) + std::cout << "run time = " << getTime() - executionStartTime << std::endl; +} + + +#if defined USE_TEST_DATA + +template <typename SampleType> void CorrelatorWorkQueue<SampleType>::setTestPattern() +{ + if (ps.nrStations() >= 3) { + double centerFrequency = 384 * ps.sampleRate(); + double baseFrequency = centerFrequency - .5 * ps.sampleRate(); + unsigned testSignalChannel = ps.nrChannelsPerSubband() >= 231 ? 230 : ps.nrChannelsPerSubband() / 2; + double signalFrequency = baseFrequency + testSignalChannel * ps.sampleRate() / ps.nrChannelsPerSubband(); + + for (unsigned time = 0; time < (NR_TAPS - 1 + ps.nrSamplesPerChannel()) * ps.nrChannelsPerSubband(); time ++) { + double phi = 2.0 * M_PI * signalFrequency * time / ps.sampleRate(); + + switch (sizeof(SampleType)) { + case 4 : hostInputSamples[2][time][1] = SampleType((short) rint(32767 * cos(phi)), (short) rint(32767 * sin(phi))); + break; + + case 2 : hostInputSamples[2][time][1] = SampleType((signed char) rint(127 * cos(phi)), (signed char) rint(127 * sin(phi))); + break; + } + } + } +} + + +template <typename SampleType> void CorrelatorWorkQueue<SampleType>::printTestOutput() +{ + if (ps.nrBaselines() >= 6) +#pragma omp critical (cout) + { + std::cout << "newgraph newcurve linetype solid pts" << std::endl; + + //for (int channel = 0; channel < ps.nrChannelsPerSubband(); channel ++) + if (ps.nrChannelsPerSubband() == 256) + for (int channel = 228; channel <= 232; channel ++) + std::cout << channel << ' ' << hostVisibilities[5][channel][1][1] << std::endl; + } +} + +#endif + + +template <typenname SampleType> void CorrelatorPipeline<SampleType>::doWork() +{ +#pragma omp parallel num_threads((profiling ? 1 : 2) * nrGPUs) + try + { + switch (ps.nrBitsPerSample()) { + case 4 : CorrelatorWorkQueue<LOFAR::i4complex>(*this).doWork(); + break; + + case 8 : CorrelatorWorkQueue<std::complex<signed char> >(*this).doWork(); + break; + + case 16 : CorrelatorWorkQueue<std::complex<short> >(*this).doWork(); + break; + } + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } +} + + +template <typenname SampleType> void BeamFormerPipeline<SampleType>::doWork() +{ +#pragma omp parallel num_threads((profiling ? 1 : 2) * nrGPUs) + try + { + switch (ps.nrBitsPerSample()) { + case 4 : BeamFormerWorkQueue<LOFAR::i4complex>(*this).doWork(); + break; + + case 8 : BeamFormerWorkQueue<std::complex<signed char> >(*this).doWork(); + break; + + case 16 : BeamFormerWorkQueue<std::complex<short> >(*this).doWork(); + break; + } + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } +} + + +template <typenname SampleType> void UHEP_Pipeline<SampleType>::doWork() +{ + float delaysAtBegin[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS] __attribute__((aligned(32))); + float delaysAfterEnd[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS] __attribute__((aligned(32))); + float phaseOffsets[ps.nrStations()][NR_POLARIZATIONS] __attribute__((aligned(32))); + + memset(delaysAtBegin, 0, sizeof delaysAtBegin); + memset(delaysAfterEnd, 0, sizeof delaysAfterEnd); + memset(phaseOffsets, 0, sizeof phaseOffsets); + delaysAtBegin[0][2][0] = 1e-6, delaysAfterEnd[0][2][0] = 1.1e-6; + +#pragma omp parallel num_threads((profiling ? 1 : 2) * nrGPUs) + try + { + switch (ps.nrBitsPerSample()) { + case 4 : UHEP_WorkQueue<LOFAR::i4complex>(*this).doWork(&delaysAtBegin[0][0][0], &delaysAfterEnd[0][0][0], &phaseOffsets[0][0]); + break; + + case 8 : UHEP_WorkQueue<std::complex<signed char> >(*this).doWork(&delaysAtBegin[0][0][0], &delaysAfterEnd[0][0][0], &phaseOffsets[0][0]); + break; + + case 16 : UHEP_WorkQueue<std::complex<short> >(*this).doWork(&delaysAtBegin[0][0][0], &delaysAfterEnd[0][0][0], &phaseOffsets[0][0]); + break; + } + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } +} + + +class UnitTest +{ + protected: + UnitTest(const Parset &ps, const char *programName = 0) + : + counter(programName != 0 ? programName : "test") + { + createContext(context, devices); + queue = cl::CommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE); + + if (programName != 0) + program = createProgram(ps, context, devices, programName); + } + + template <typename T> void check(T actual, T expected) + { + if (expected != actual) { + std::cerr << "Test FAILED: expected " << expected << ", computed " << actual << std::endl; + exit(1); + } else { + std::cout << "Test OK" << std::endl; + } + } + + cl::Context context; + std::vector<cl::Device> devices; + cl::Program program; + cl::CommandQueue queue; + + PerformanceCounter counter; +}; + + +struct CorrelatorTest : public UnitTest +{ + CorrelatorTest(const Parset &ps) + : + UnitTest(ps, "Correlator.cl") + { + if (ps.nrStations() >= 5 && ps.nrChannelsPerSubband() >= 6 && ps.nrSamplesPerChannel() >= 100) { + MultiArraySharedBuffer<std::complex<float>, 4> inputData(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> visibilities(boost::extents[ps.nrBaselines()][ps.nrChannelsPerSubband()][NR_POLARIZATIONS][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + CorrelatorKernel correlator(ps, queue, program, visibilities, inputData); + + inputData[3][5][99][1] = std::complex<float>(3, 4); + inputData[4][5][99][1] = std::complex<float>(5, 6); + + inputData.hostToDevice(CL_FALSE); + correlator.enqueue(queue, counter); + visibilities.deviceToHost(CL_TRUE); + + check(visibilities[13][5][1][1], std::complex<float>(39, 2)); + } + } +}; + + +struct IncoherentStokesTest : public UnitTest +{ + IncoherentStokesTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/IncoherentStokes.cl") + { + if (ps.nrStations() >= 5 && ps.nrChannelsPerSubband() >= 14 && ps.nrSamplesPerChannel() >= 108) { + MultiArraySharedBuffer<std::complex<float>, 4> inputData(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<float, 3> stokesData(boost::extents[ps.nrIncoherentStokes()][ps.nrSamplesPerChannel() / ps.incoherentStokesTimeIntegrationFactor()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + IncoherentStokesKernel kernel(ps, queue, program, stokesData, inputData); + + inputData[4][13][107][0] = std::complex<float>(2, 3); + inputData[4][13][107][1] = std::complex<float>(4, 5); + + inputData.hostToDevice(CL_FALSE); + kernel.enqueue(queue, counter); + stokesData.deviceToHost(CL_TRUE); + + const static float expected[] = { 54, -28, 46, 4 }; + + for (unsigned stokes = 0; stokes < ps.nrIncoherentStokes(); stokes ++) + check(stokesData[stokes][107 / ps.incoherentStokesTimeIntegrationFactor()][13], expected[stokes]); + } + } +}; + + +template <typename SampleType> struct IntToFloatTest : public UnitTest +{ + IntToFloatTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/IntToFloat.cl") + { + if (ps.nrStations() >= 3 && ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() >= 10077) { + MultiArraySharedBuffer<SampleType, 3> inputData(boost::extents[ps.nrStations()][ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 3> outputData(boost::extents[ps.nrStations()][NR_POLARIZATIONS][ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + IntToFloatKernel kernel(ps, queue, program, outputData, inputData); + + inputData[2][10076][1] = 7; + inputData.hostToDevice(CL_FALSE); + kernel.enqueue(queue, counter); + outputData.deviceToHost(CL_TRUE); + check(outputData[2][1][10076], std::complex<float>(7.0f, 0)); + } + } +}; + + +struct BeamFormerTest : public UnitTest +{ + BeamFormerTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/BeamFormer.cl") + { + if (ps.nrStations() >= 5 && ps.nrSamplesPerChannel() >= 13 && ps.nrChannelsPerSubband() >= 7 && ps.nrTABs() >= 6) { + MultiArraySharedBuffer<std::complex<float>, 4> inputData(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE); + BeamFormerKernel beamFormer(ps, program, complexVoltages, inputData, beamFormerWeights); + + inputData[4][6][12][1] = std::complex<float>(2.2, 3); + beamFormerWeights[4][6][5] = std::complex<float>(4, 5); + + inputData.hostToDevice(CL_FALSE); + beamFormerWeights.hostToDevice(CL_FALSE); + beamFormer.enqueue(queue, counter); + complexVoltages.deviceToHost(CL_TRUE); + + check(complexVoltages[6][12][5][1], std::complex<float>(-6.2, 23)); + +#if 0 + for (unsigned tab = 0; tab < ps.nrTABs(); tab ++) + for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) + for (unsigned ch = 0; ch < ps.nrChannelsPerSubband(); ch ++) + for (unsigned t = 0; t < ps.nrSamplesPerChannel(); t ++) + if (complexVoltages[tab][pol][ch][t] != std::complex<float>(0, 0)) + std::cout << "complexVoltages[" << tab << "][" << pol << "][" << ch << "][" << t << "] = " << complexVoltages[tab][pol][ch][t] << std::endl; +#endif + } + } +}; + + +struct BeamFormerTransposeTest : public UnitTest +{ + BeamFormerTransposeTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/Transpose.cl") + { + if (ps.nrChannelsPerSubband() >= 19 && ps.nrSamplesPerChannel() >= 175 && ps.nrTABs() >= 5) { + MultiArraySharedBuffer<std::complex<float>, 4> transposedData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + BeamFormerTransposeKernel transpose(ps, program, transposedData, complexVoltages); + + complexVoltages[18][174][4][1] = std::complex<float>(24, 42); + + complexVoltages.hostToDevice(CL_FALSE); + transpose.enqueue(queue, counter); + transposedData.deviceToHost(CL_TRUE); + + check(transposedData[4][1][174][18], std::complex<float>(24, 42)); + } + } +}; + + +struct DedispersionChirpTest : public UnitTest +{ + DedispersionChirpTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/Dedispersion.cl") + { + if (ps.nrTABs() > 3 && ps.nrChannelsPerSubband() > 13 && ps.nrSamplesPerChannel() / ps.dedispersionFFTsize() > 1 && ps.dedispersionFFTsize() > 77) { + MultiArraySharedBuffer<std::complex<float>, 5> data(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel() / ps.dedispersionFFTsize()][ps.dedispersionFFTsize()], queue, CL_MEM_READ_WRITE, CL_MEM_READ_WRITE); + MultiArraySharedBuffer<float, 1> DMs(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + DedispersionChirpKernel dedispersionChirpKernel(ps, program, queue, data, DMs); + + data[3][1][13][1][77] = std::complex<float>(2, 3); + DMs[3] = 2; + + DMs.hostToDevice(CL_FALSE); + data.hostToDevice(CL_FALSE); + dedispersionChirpKernel.enqueue(queue, counter, 60e6); + data.deviceToHost(CL_TRUE); + + std::cout << data[3][1][13][1][77] << std::endl; + } + } +}; + + +struct CoherentStokesTest : public UnitTest +{ + CoherentStokesTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/CoherentStokes.cl") + { + if (ps.nrChannelsPerSubband() >= 19 && ps.nrSamplesPerChannel() >= 175 && ps.nrTABs() >= 5) { + MultiArraySharedBuffer<float, 4> stokesData(boost::extents[ps.nrTABs()][ps.nrCoherentStokes()][ps.nrSamplesPerChannel() / ps.coherentStokesTimeIntegrationFactor()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); +#if 1 + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + CoherentStokesKernel stokesKernel(ps, program, stokesData, complexVoltages); + + complexVoltages[18][174][4][0] = std::complex<float>(2, 3); + complexVoltages[18][174][4][1] = std::complex<float>(4, 5); +#else + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + CoherentStokesKernel stokesKernel(ps, program, stokesData, complexVoltages); + + complexVoltages[18][174][4][0] = std::complex<float>(2, 3); + complexVoltages[18][174][4][1] = std::complex<float>(4, 5); +#endif + + complexVoltages.hostToDevice(CL_FALSE); + stokesKernel.enqueue(queue, counter); + stokesData.deviceToHost(CL_TRUE); + + for (unsigned stokes = 0; stokes < ps.nrCoherentStokes(); stokes ++) + std::cout << stokesData[4][stokes][174 / ps.coherentStokesTimeIntegrationFactor()][18] << std::endl; + } + } +}; + + +template <typename SampleType> struct UHEP_BeamFormerTest : public UnitTest +{ + UHEP_BeamFormerTest(const Parset &ps) + : + UnitTest(ps, "UHEP/BeamFormer.cl") + { + if (ps.nrStations() >= 5 && (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) >= 13 && ps.nrSubbands() >= 7 && ps.nrTABs() >= 6) { + MultiArraySharedBuffer<SampleType, 4> inputSamples(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE); + UHEP_BeamFormerKernel<SampleType> beamFormer(ps, program, complexVoltages, inputSamples, beamFormerWeights); + + inputSamples[4][6][12][1] = SampleType(2, 3); + beamFormerWeights[4][6][5] = SampleType(4, 5); + + inputSamples.hostToDevice(CL_FALSE); + beamFormerWeights.hostToDevice(CL_FALSE); + beamFormer.enqueue(queue, counter); + complexVoltages.deviceToHost(CL_TRUE); + + check(complexVoltages[6][12][5][1], std::complex<float>(-7, 22)); + } + } +}; + + +struct UHEP_TransposeTest : public UnitTest +{ + UHEP_TransposeTest(const Parset &ps) + : + UnitTest(ps, "UHEP/Transpose.cl") + { + if (ps.nrSubbands() >= 19 && ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1 >= 175 && ps.nrTABs() >= 5) { + MultiArraySharedBuffer<std::complex<float>, 4> transposedData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][512], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + cl::Buffer devReverseSubbandMapping(context, CL_MEM_READ_ONLY, 512 * sizeof(int)); + UHEP_TransposeKernel transpose(ps, program, transposedData, complexVoltages, devReverseSubbandMapping); + + complexVoltages[18][174][4][1] = std::complex<float>(24, 42); + + queue.enqueueWriteBuffer(devReverseSubbandMapping, CL_FALSE, 0, 512 * sizeof(int), reverseSubbandMapping); + complexVoltages.hostToDevice(CL_FALSE); + transpose.enqueue(queue, counter); + transposedData.deviceToHost(CL_TRUE); + + check(transposedData[4][1][174][38], std::complex<float>(24, 42)); + } + } +}; + + +struct UHEP_TriggerTest : public UnitTest +{ + UHEP_TriggerTest(const Parset &ps) + : + UnitTest(ps, "UHEP/Trigger.cl") + { + if (ps.nrTABs() >= 4 && 1024 * ps.nrSamplesPerChannel() > 100015) { + MultiArraySharedBuffer<float, 3> inputData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel() * 1024], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<TriggerInfo, 1> triggerInfo(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + UHEP_TriggerKernel trigger(ps, program, triggerInfo, inputData); + + inputData[3][1][100015] = 1000; + + inputData.hostToDevice(CL_FALSE); + trigger.enqueue(queue, counter); + triggerInfo.deviceToHost(CL_TRUE); + + std::cout << "trigger info: mean = " << triggerInfo[3].mean << ", variance = " << triggerInfo[3].variance << ", bestValue = " << triggerInfo[3].bestValue << ", bestApproxIndex = " << triggerInfo[3].bestApproxIndex << std::endl; + //check(triggerInfo[3].mean, (float) (1000.0f * 1000.0f) / (float) (ps.nrSamplesPerChannel() * 1024)); + check(triggerInfo[3].bestValue, 1000.0f * 1000.0f); + check(triggerInfo[3].bestApproxIndex, 100016U); + } + } +}; + + +#if 0 +struct FFT_Test : public UnitTest +{ + FFT_Test(const Parset &ps) + : UnitTest(ps, "fft.cl") + { + MultiArraySharedBuffer<std::complex<float>, 1> in(boost::extents[8], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 1> out(boost::extents[8], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + + for (unsigned i = 0; i < 8; i ++) + in[i] = std::complex<float>(2 * i + 1, 2 * i + 2); + + clAmdFftSetupData setupData; + cl::detail::errHandler(clAmdFftInitSetupData(&setupData), "clAmdFftInitSetupData"); + setupData.debugFlags = CLFFT_DUMP_PROGRAMS; + cl::detail::errHandler(clAmdFftSetup(&setupData), "clAmdFftSetup"); + + clAmdFftPlanHandle plan; + size_t dim[1] = { 8 }; + + cl::detail::errHandler(clAmdFftCreateDefaultPlan(&plan, context(), CLFFT_1D, dim), "clAmdFftCreateDefaultPlan"); + cl::detail::errHandler(clAmdFftSetResultLocation(plan, CLFFT_OUTOFPLACE), "clAmdFftSetResultLocation"); + cl::detail::errHandler(clAmdFftSetPlanBatchSize(plan, 1), "clAmdFftSetPlanBatchSize"); + cl::detail::errHandler(clAmdFftBakePlan(plan, 1, &queue(), 0, 0), "clAmdFftBakePlan"); + + in.hostToDevice(CL_FALSE); + cl_mem ins[1] = { ((cl::Buffer) in)() }; + cl_mem outs[1] = { ((cl::Buffer) out)() }; +#if 1 + cl::detail::errHandler(clAmdFftEnqueueTransform(plan, CLFFT_FORWARD, 1, &queue(), 0, 0, 0, ins, outs, 0), "clAmdFftEnqueueTransform"); +#else + cl::Kernel kernel(program, "fft_fwd"); + kernel.setArg(0, (cl::Buffer) in); + kernel.setArg(1, (cl::Buffer) out); + queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(64, 1, 1), cl::NDRange(64, 1, 1)); +#endif + out.deviceToHost(CL_TRUE); + + for (unsigned i = 0; i < 8; i ++) + std::cout << out[i] << std::endl; + + cl::detail::errHandler(clAmdFftDestroyPlan(&plan), "clAmdFftDestroyPlan"); + cl::detail::errHandler(clAmdFftTeardown(), "clAmdFftTeardown"); + } +}; +#endif + + +} // namespace RTCP +} // namespace LOFAR + +int main(int argc, char **argv) +{ + using namespace LOFAR::RTCP; + + std::cout << "running ..." << std::endl; + + if (setenv("DISPLAY", ":0.0", 1) < 0) { + perror("error setting DISPLAY"); + exit(1); + } + + if (argc != 2) { + std::cerr << "usage: " << argv[0] << " parset" << std::endl; + exit(1); + } + +#if 0 && defined __linux__ + set_affinity(0); +#endif + + try { + Parset ps(argv[1]); + +#if 0 + const char *str = getenv("NR_STATIONS"); + ps.nrStations() = str ? atoi(str) : 77; +#endif + std::cout << "nr stations = " << ps.nrStations() << std::endl; + + const char *str = getenv("NR_GPUS"); + nrGPUs = str ? atoi(str) : 1; + +#if 0 + ps.nrSubbands() = 10;//488; + ps.nrChannelsPerSubband() = 64; + ps.nrBeams() = 1; + ps.nrSamplesPerChannel() = 196608 / ps.nrChannelsPerSubband(); + ps.subbandBandwidth() = 195312.5; + ps.correctBandPass() = true; +#endif + + profiling = false; CorrelatorPipeline(ps).doWork(); + profiling = true; CorrelatorPipeline(ps).doWork(); + + //(CorrelatorTest)(ps); + +#if 0 + ps.nrSubbands() = 488; + ps.nrChannelsPerSubband() = 2048; + ps.nrBeams() = 1; + ps.nrTABs() = 128; + ps.nrIncoherentStokes() = 4; + ps.nrCoherentStokes() = 4; + ps.incoherentStokesTimeIntegrationFactor() = 8; + ps.coherentStokesTimeIntegrationFactor() = 8; + ps.nrSamplesPerChannel() = 65536 / ps.nrChannelsPerSubband();//262144 / ps.nrChannelsPerSubband(); + ps.subbandBandwidth() = 195312.5; + ps.correctBandPass() = true; + ps.dedispersionFFTsize() = ps.nrSamplesPerChannel(); + + profiling = false; BeamFormerPipeline(ps).doWork(); + profiling = true; BeamFormerPipeline(ps).doWork(); + //(IncoherentStokesTest)(ps); + //(IntToFloatTest)(ps); + //(BeamFormerTest)(ps); + //(BeamFormerTransposeTest)(ps); + //(DedispersionChirpTest)(ps); + //(CoherentStokesTest)(ps); +#endif + +#if 0 + ps.nrSubbands() = 488; + ps.nrSamplesPerChannel() = 1024; + ps.nrBeams() = 1; + ps.subbandBandwidth() = 195312.5; + ps.nrTABs() = 48; + + profiling = false; UHEP_Pipeline(ps).doWork(); + profiling = true; UHEP_Pipeline(ps).doWork(); + //(UHEP_BeamFormerTest)(ps); + //(UHEP_TransposeTest)(ps); + //(UHEP_TriggerTest)(ps); +#endif + +#if 0 + (FFT_Test)(ps); +#endif + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + + return 0; +} diff --git a/RTCP/GPUProc/src/RTCP.cc.ok b/RTCP/GPUProc/src/RTCP.cc.ok new file mode 100644 index 0000000000000000000000000000000000000000..da22af7c949fec47f566332d5d056abd02c98b69 --- /dev/null +++ b/RTCP/GPUProc/src/RTCP.cc.ok @@ -0,0 +1,2018 @@ +#include "lofar_config.h" + +#define __CL_ENABLE_EXCEPTIONS +#include <CL/cl.hpp> + +#include <omp.h> + +#include <cmath> +#include <complex> +#include <cstdio> +#include <cstdlib> +#include <fstream> +#include <iomanip> +#include <iostream> +#include <sstream> +#include <boost/multi_array.hpp> + +#include "Align.h" +#include "BandPass.h" +#include "Common.h" +#include "FilterBank.h" +#include "Common/lofar_datetime.h" +#include "Common/LofarLogger.h" +#include "Common/SystemUtil.h" +#include "Interface/Parset.h" +#include "OpenCL_FFT/clFFT.h" +#include "OpenCL_Support.h" +#include "OpenMP_Support.h" +#include "UHEP/InvertedStationPPFWeights.h" +//#include "clAmdFft/include/clAmdFft.h" + +namespace LOFAR { +namespace RTCP { + +bool profiling = true; +unsigned nrGPUs; + +//#define NR_BITS_PER_SAMPLE 8 +#define NR_POLARIZATIONS 2 +#define NR_TAPS 16 +#define NR_STATION_FILTER_TAPS 16 + +#define USE_2X2 +#undef USE_CUSTOM_FFT +#undef USE_TEST_DATA +#undef USE_B7015 + +#if 0 +#if NR_BITS_PER_SAMPLE == 16 +typedef std::complex<short> SampleType; +#elif NR_BITS_PER_SAMPLE == 8 +typedef std::complex<signed char> SampleType; +#endif +#endif + + +#if 0 + +class Parset +{ + public: + unsigned nrStations() const { return _nrStations; } + unsigned nrBaselines() const { return nrStations() * (nrStations() + 1) / 2; } + unsigned nrSubbands() const { return _nrSubbands; } + float subbandBandwidth() const { return _subbandBandwidth; } + unsigned nrChannelsPerSubband() const { return _nrChannelsPerSubband; } + unsigned nrBeams() const { return _nrBeams; } + unsigned nrSamplesPerChannel() const { return _nrSamplesPerChannel; } + bool correctBandPass() const { return _correctBandPass; } + unsigned nrTABs() const { return _nrTABs; } + unsigned nrCoherentStokes() const { return _nrCoherentStokes; } + unsigned nrIncoherentStokes() const { return _nrIncoherentStokes; } + unsigned coherentStokesTimeIntegrationFactor() const { return _coherentStokesTimeIntegrationFactor; } + unsigned incoherentStokesTimeIntegrationFactor() const { return _incoherentStokesTimeIntegrationFactor; } + unsigned dedispersionFFTsize() const { return _dedispersionFFTsize; } + + unsigned &nrStations() { return _nrStations; } + unsigned &nrSubbands() { return _nrSubbands; } + float &subbandBandwidth() { return _subbandBandwidth; } + unsigned &nrChannelsPerSubband() { return _nrChannelsPerSubband; } + unsigned &nrBeams() { return _nrBeams; } + unsigned &nrSamplesPerChannel() { return _nrSamplesPerChannel; } + bool &correctBandPass() { return _correctBandPass; } + unsigned &nrTABs() { return _nrTABs; } + unsigned &nrCoherentStokes() { return _nrCoherentStokes; } + unsigned &nrIncoherentStokes() { return _nrIncoherentStokes; } + unsigned &coherentStokesTimeIntegrationFactor() { return _coherentStokesTimeIntegrationFactor; } + unsigned &incoherentStokesTimeIntegrationFactor() { return _incoherentStokesTimeIntegrationFactor; } + unsigned &dedispersionFFTsize() { return _dedispersionFFTsize; } + + private: + unsigned _nrStations; + unsigned _nrSubbands; + float _subbandBandwidth; + unsigned _nrChannelsPerSubband; + unsigned _nrBeams; + unsigned _nrSamplesPerChannel; + bool _correctBandPass; + unsigned _nrTABs; + unsigned _nrCoherentStokes; + unsigned _nrIncoherentStokes; + unsigned _coherentStokesTimeIntegrationFactor; + unsigned _incoherentStokesTimeIntegrationFactor; + unsigned _dedispersionFFTsize; +}; + +#endif + + +class PerformanceCounter +{ + public: + PerformanceCounter(const std::string &name); + ~PerformanceCounter(); + + void doOperation(cl::Event &, size_t nrOperations, size_t nrBytesRead, size_t nrBytesWritten); + + private: + static void eventCompleteCallBack(cl_event, cl_int /*status*/, void *counter); + + size_t totalNrOperations, totalNrBytesRead, totalNrBytesWritten; + double totalTime; + unsigned totalEvents; + const std::string name; +}; + + +PerformanceCounter::PerformanceCounter(const std::string &name) +: + totalNrOperations(0), + totalNrBytesRead(0), + totalNrBytesWritten(0), + totalTime(0), + totalEvents(0), + name(name) +{ +} + + +PerformanceCounter::~PerformanceCounter() +{ + if (totalTime > 0) +#pragma omp critical (cout) + std::cout << std::setw(12) << name + << std::setprecision(3) + << ": avg. time = " << 1000 * totalTime / totalEvents << " ms, " + "GFLOP/s = " << totalNrOperations / totalTime / 1e9 << ", " + "R/W = " << totalNrBytesRead / totalTime / 1e9 << '+' + << totalNrBytesWritten / totalTime / 1e9 << '=' + << (totalNrBytesRead + totalNrBytesWritten) / totalTime / 1e9 << " GB/s" + << std::endl; +} + + +void PerformanceCounter::eventCompleteCallBack(cl_event ev, cl_int /*status*/, void *counter) +{ + cl::Event event(ev); + + size_t queued, submitted, start, stop; + event.getProfilingInfo(CL_PROFILING_COMMAND_QUEUED, &queued); + event.getProfilingInfo(CL_PROFILING_COMMAND_SUBMIT, &submitted); + event.getProfilingInfo(CL_PROFILING_COMMAND_START, &start); + event.getProfilingInfo(CL_PROFILING_COMMAND_END, &stop); + double seconds = (stop - start) / 1e9; + + if (seconds < 0 || seconds > 15) +#pragma omp critical (cout) + std::cout << "BAH! " << omp_get_thread_num() << ": " << queued << ' ' << submitted - queued << ' ' << start - queued << ' ' << stop - queued << std::endl; + +#pragma omp atomic + static_cast<PerformanceCounter *>(counter)->totalTime += seconds; + + // cl::~Event() decreases ref count +} + + +void PerformanceCounter::doOperation(cl::Event &event, size_t nrOperations, size_t nrBytesRead, size_t nrBytesWritten) +{ + // reference count between C and C++ conversions is serously broken in C++ wrapper + cl_event ev = event(); + cl_int error = clRetainEvent(ev); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clRetainEvent"); + + if (profiling) { + event.setCallback(CL_COMPLETE, &PerformanceCounter::eventCompleteCallBack, this); + +#pragma omp atomic + totalNrOperations += nrOperations; +#pragma omp atomic + totalNrBytesRead += nrBytesRead; +#pragma omp atomic + totalNrBytesWritten += nrBytesWritten; +#pragma omp atomic + ++ totalEvents; + } +} + + +cl::Program createProgram(const Parset &ps, cl::Context &context, std::vector<cl::Device> &devices, const char *sources) +{ + std::stringstream args; + args << "-cl-fast-relaxed-math"; + + std::vector<cl_context_properties> properties; + context.getInfo(CL_CONTEXT_PROPERTIES, &properties); + + if (cl::Platform((cl_platform_id) properties[1]).getInfo<CL_PLATFORM_NAME>() == "NVIDIA CUDA") { + args << " -cl-nv-verbose"; + args << " -cl-nv-opt-level=99"; + //args << " -cl-nv-maxrregcount=63"; + args << " -DNVIDIA_CUDA"; + } + + //if (devices[0].getInfo<CL_DEVICE_NAME>() == "GeForce GTX 680") + //args << " -DUSE_FLOAT4_IN_CORRELATOR"; + + args << " -I" << dirname(__FILE__); + args << " -DNR_BITS_PER_SAMPLE=" << ps.nrBitsPerSample(); + args << " -DSUBBAND_BANDWIDTH=" << std::setprecision(7) << ps.subbandBandwidth() << 'f'; + args << " -DNR_SUBBANDS=" << ps.nrSubbands(); + args << " -DNR_CHANNELS=" << ps.nrChannelsPerSubband(); + args << " -DNR_STATIONS=" << ps.nrStations(); + args << " -DNR_SAMPLES_PER_CHANNEL=" << ps.nrSamplesPerChannel(); + args << " -DNR_SAMPLES_PER_SUBBAND=" << ps.nrSamplesPerSubband(); + args << " -DNR_BEAMS=" << ps.nrBeams(); + args << " -DNR_TABS=" << ps.nrTABs(); + args << " -DNR_COHERENT_STOKES=" << ps.nrCoherentStokes(); + args << " -DNR_INCOHERENT_STOKES=" << ps.nrIncoherentStokes(); + args << " -DCOHERENT_STOKES_TIME_INTEGRATION_FACTOR=" << ps.coherentStokesTimeIntegrationFactor(); + args << " -DINCOHERENT_STOKES_TIME_INTEGRATION_FACTOR=" << ps.incoherentStokesTimeIntegrationFactor(); + args << " -DNR_POLARIZATIONS=" << NR_POLARIZATIONS; + args << " -DNR_TAPS=" << NR_TAPS; + args << " -DNR_STATION_FILTER_TAPS=" << NR_STATION_FILTER_TAPS; + + if (ps.delayCompensation()) + args << " -DDELAY_COMPENSATION"; + + if (ps.correctBandPass()) + args << " -DBANDPASS_CORRECTION"; + + args << " -DDEDISPERSION_FFT_SIZE=" << ps.dedispersionFFTsize(); + return createProgram(context, devices, sources, args.str().c_str()); +} + + +class FFT_Plan +{ + public: + FFT_Plan(cl::Context &context, unsigned fftSize) + { + clFFT_Dim3 dim = { fftSize, 1, 1 }; + cl_int error; + plan = clFFT_CreatePlan(context(), dim, clFFT_1D, clFFT_InterleavedComplexFormat, &error); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clFFT_CreatePlan"); + + //clFFT_DumpPlan(plan, stdout); + } + + ~FFT_Plan() + { + clFFT_DestroyPlan(plan); + } + + clFFT_Plan plan; +}; + + +class Pipeline +{ + public: + Pipeline(const Parset &); + + cl::Program createProgram(const char *sources); + + cl::Context context; + std::vector<cl::Device> devices; + const Parset &ps; +}; + + +class CorrelatorPipeline : public Pipeline +{ + public: + CorrelatorPipeline(const Parset &); + + void doWork(); + + //private: + //friend class CorrelatorWorkQueue; + + FilterBank filterBank; + + cl::Program firFilterProgram, delayAndBandPassProgram, correlatorProgram; + PerformanceCounter firFilterCounter, delayAndBandPassCounter, correlatorCounter, fftCounter; + PerformanceCounter samplesCounter, visibilitiesCounter; + +#if defined USE_B7015 + OMP_Lock hostToDeviceLock[4], deviceToHostLock[4]; +#endif +}; + + +class BeamFormerPipeline : public Pipeline +{ + public: + BeamFormerPipeline(const Parset &); + + void doWork(); + + cl::Program intToFloatProgram, delayAndBandPassProgram, beamFormerProgram, transposeProgram, dedispersionChirpProgram; + + PerformanceCounter intToFloatCounter, fftCounter, delayAndBandPassCounter, beamFormerCounter, transposeCounter, dedispersionForwardFFTcounter, dedispersionChirpCounter, dedispersionBackwardFFTcounter; + PerformanceCounter samplesCounter; +}; + + +class UHEP_Pipeline : public Pipeline +{ + public: + UHEP_Pipeline(const Parset &); + + void doWork(); + + cl::Program beamFormerProgram, transposeProgram, invFFTprogram, invFIRfilterProgram, triggerProgram; + PerformanceCounter beamFormerCounter, transposeCounter, invFFTcounter, invFIRfilterCounter, triggerCounter; + PerformanceCounter beamFormerWeightsCounter, samplesCounter; +}; + + +Pipeline::Pipeline(const Parset &ps) +: + ps(ps) +{ + createContext(context, devices); +} + + +cl::Program Pipeline::createProgram(const char *sources) +{ + return LOFAR::RTCP::createProgram(ps, context, devices, sources); +} + + +CorrelatorPipeline::CorrelatorPipeline(const Parset &ps) +: + Pipeline(ps), + filterBank(true, NR_TAPS, ps.nrChannelsPerSubband(), KAISER), + firFilterCounter("FIR filter"), + delayAndBandPassCounter("delay/bp"), + correlatorCounter("correlator"), + fftCounter("FFT"), + samplesCounter("samples"), + visibilitiesCounter("visibilities") +{ + filterBank.negateWeights(); + + double startTime = getTime(); + +#pragma omp parallel sections + { +#pragma omp section + firFilterProgram = createProgram("FIR.cl"); +#pragma omp section + delayAndBandPassProgram = createProgram("DelayAndBandPass.cl"); +#pragma omp section + correlatorProgram = createProgram("Correlator.cl"); + } + + std::cout << "compile time = " << getTime() - startTime << std::endl; +} + + +BeamFormerPipeline::BeamFormerPipeline(const Parset &ps) +: + Pipeline(ps), + intToFloatCounter("int-to-float"), + fftCounter("FFT"), + delayAndBandPassCounter("delay/bp"), + beamFormerCounter("beamformer"), + transposeCounter("transpose"), + dedispersionForwardFFTcounter("ddisp.fw.FFT"), + dedispersionChirpCounter("chirp"), + dedispersionBackwardFFTcounter("ddisp.bw.FFT"), + samplesCounter("samples") +{ + double startTime = getTime(); + +#pragma omp parallel sections + { +#pragma omp section + intToFloatProgram = createProgram("BeamFormer/IntToFloat.cl"); +#pragma omp section + delayAndBandPassProgram = createProgram("DelayAndBandPass.cl"); +#pragma omp section + beamFormerProgram = createProgram("BeamFormer/BeamFormer.cl"); +#pragma omp section + transposeProgram = createProgram("BeamFormer/Transpose.cl"); +#pragma omp section + dedispersionChirpProgram = createProgram("BeamFormer/Dedispersion.cl"); + } + + std::cout << "compile time = " << getTime() - startTime << std::endl; +} + + +UHEP_Pipeline::UHEP_Pipeline(const Parset &ps) +: + Pipeline(ps), + beamFormerCounter("beamformer"), + transposeCounter("transpose"), + invFFTcounter("inv. FFT"), + invFIRfilterCounter("inv. FIR"), + triggerCounter("trigger"), + beamFormerWeightsCounter("BF weights"), + samplesCounter("samples") +{ + double startTime = getTime(); + +#pragma omp parallel sections + { +#pragma omp section + beamFormerProgram = createProgram("UHEP/BeamFormer.cl"); +#pragma omp section + transposeProgram = createProgram("UHEP/Transpose.cl"); +#pragma omp section + invFFTprogram = createProgram("UHEP/InvFFT.cl"); +#pragma omp section + invFIRfilterProgram = createProgram("UHEP/InvFIR.cl"); +#pragma omp section + triggerProgram = createProgram("UHEP/Trigger.cl"); + } + + std::cout << "compile time = " << getTime() - startTime << std::endl; +} + + +class WorkQueue +{ + public: + WorkQueue(Pipeline &); + + const unsigned gpu; + cl::Device &device; + cl::CommandQueue queue; + + protected: + const Parset &ps; +}; + + +template <typename SampleType> class CorrelatorWorkQueue : public WorkQueue +{ + public: + CorrelatorWorkQueue(CorrelatorPipeline &); + + void doWork(); + +#if defined USE_TEST_DATA + void setTestPattern(); + void printTestOutput(); +#endif + + //private: + CorrelatorPipeline &pipeline; + cl::Buffer devFIRweights; + MultiArraySharedBuffer<float, 1> bandPassCorrectionWeights; + MultiArraySharedBuffer<float, 3> delaysAtBegin, delaysAfterEnd; + MultiArraySharedBuffer<float, 2> phaseOffsets; + MultiArraySharedBuffer<SampleType, 3> inputSamples; + + cl::Buffer devFilteredData; + cl::Buffer devCorrectedData; + + MultiArraySharedBuffer<std::complex<float>, 4> visibilities; +}; + + +template <typename SampleType> class BeamFormerWorkQueue : public WorkQueue +{ + public: + BeamFormerWorkQueue(BeamFormerPipeline &); + + void doWork(); + + BeamFormerPipeline &pipeline; + + MultiArraySharedBuffer<SampleType, 3> inputSamples; + cl::Buffer devFilteredData; + MultiArraySharedBuffer<float, 1> bandPassCorrectionWeights; + MultiArraySharedBuffer<float, 3> delaysAtBegin, delaysAfterEnd; + MultiArraySharedBuffer<float, 2> phaseOffsets; + cl::Buffer devCorrectedData; + MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights; + cl::Buffer devComplexVoltages; + MultiArraySharedBuffer<std::complex<float>, 4> transposedComplexVoltages; + MultiArraySharedBuffer<float, 1> DMs; +}; + + +struct TriggerInfo { + float mean, variance, bestValue; + unsigned bestApproxIndex; +}; + +template <typename SampleType> class UHEP_WorkQueue : public WorkQueue +{ + public: + UHEP_WorkQueue(UHEP_Pipeline &); + + void doWork(const float *delaysAtBegin, const float *delaysAfterEnd, const float *phaseOffsets); + + UHEP_Pipeline &pipeline; + cl::Event inputSamplesEvent, beamFormerWeightsEvent; + + cl::Buffer devBuffers[2]; + cl::Buffer devInputSamples; + MultiArrayHostBuffer<SampleType, 4> hostInputSamples; + + cl::Buffer devBeamFormerWeights; + MultiArrayHostBuffer<std::complex<float>, 3> hostBeamFormerWeights; + + cl::Buffer devComplexVoltages; + cl::Buffer devReverseSubbandMapping; + cl::Buffer devFFTedData; + cl::Buffer devInvFIRfilteredData; + cl::Buffer devInvFIRfilterWeights; + + cl::Buffer devTriggerInfo; + VectorHostBuffer<TriggerInfo> hostTriggerInfo; +}; + + +class Kernel : public cl::Kernel +{ + public: + Kernel(const Parset &ps, cl::Program &program, const char *name) + : + cl::Kernel(program, name), + ps(ps) + { + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter) + { + queue.enqueueNDRangeKernel(*this, cl::NullRange, globalWorkSize, localWorkSize, 0, &event); + counter.doOperation(event, nrOperations, nrBytesRead, nrBytesWritten); + } + + protected: + cl::Event event; + const Parset &ps; + cl::NDRange globalWorkSize, localWorkSize; + size_t nrOperations, nrBytesRead, nrBytesWritten; +}; + + +template <typename SampleType> class FIR_FilterKernel : public Kernel +{ + public: + FIR_FilterKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devFilteredData, cl::Buffer &devInputSamples, cl::Buffer &devFIRweights) + : + Kernel(ps, program, "FIR_filter") + { + setArg(0, devFilteredData); + setArg(1, devInputSamples); + setArg(2, devFIRweights); + + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + unsigned totalNrThreads = ps.nrChannelsPerSubband() * NR_POLARIZATIONS * 2; + unsigned nrPasses = (totalNrThreads + maxNrThreads - 1) / maxNrThreads; + globalWorkSize = cl::NDRange(totalNrThreads, ps.nrStations()); + localWorkSize = cl::NDRange(totalNrThreads / nrPasses, 1); + + size_t nrSamples = (size_t) ps.nrStations() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS; + nrOperations = nrSamples * ps.nrSamplesPerChannel() * NR_TAPS * 2 * 2; + nrBytesRead = nrSamples * (NR_TAPS - 1 + ps.nrSamplesPerChannel()) * sizeof(SampleType); + nrBytesWritten = nrSamples * ps.nrSamplesPerChannel() * sizeof(std::complex<float>); + } +}; + + +class FFT_Kernel +{ + public: + FFT_Kernel(cl::Context &context, unsigned fftSize, unsigned nrFFTs, bool forward, cl::Buffer &buffer) + : + nrFFTs(nrFFTs), + fftSize(fftSize) +#if defined USE_CUSTOM_FFT + { + ASSERT(fftSize == 256); + ASSERT(forward); + std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>(); + cl::Program program = createProgram(context, devices, "FFT.cl", ""); + kernel = cl::Kernel(program, "fft0"); + kernel.setArg(0, buffer); + } +#else + , direction(forward ? clFFT_Forward : clFFT_Inverse), + plan(context, fftSize), + buffer(buffer) + { + } +#endif + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter) + { +#if defined USE_CUSTOM_FFT + queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(nrFFTs * 64 / 4, 4), cl::NDRange(64, 4), 0, &event); +#else + cl_int error = clFFT_ExecuteInterleaved(queue(), plan.plan, nrFFTs, direction, buffer(), buffer(), 0, 0, &event()); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clFFT_ExecuteInterleaved"); +#endif + + counter.doOperation(event, + (size_t) nrFFTs * 5 * fftSize * log2(fftSize), + (size_t) nrFFTs * fftSize * sizeof(std::complex<float>), + (size_t) nrFFTs * fftSize * sizeof(std::complex<float>)); + } + + private: + unsigned nrFFTs, fftSize; +#if defined USE_CUSTOM_FFT + cl::Kernel kernel; +#else + clFFT_Direction direction; + FFT_Plan plan; + cl::Buffer &buffer; +#endif + cl::Event event; +}; + + +class Filter_FFT_Kernel : public FFT_Kernel +{ + public: + Filter_FFT_Kernel(const Parset &ps, cl::Context &context, cl::Buffer &devFilteredData) + : + FFT_Kernel(context, ps.nrChannelsPerSubband(), ps.nrStations() * NR_POLARIZATIONS * ps.nrSamplesPerChannel(), true, devFilteredData) + { + } +}; + + +class DelayAndBandPassKernel : public Kernel +{ + public: + DelayAndBandPassKernel(const Parset &ps, cl::Program &program, cl::Buffer &devCorrectedData, cl::Buffer &devFilteredData, cl::Buffer &devDelaysAtBegin, cl::Buffer &devDelaysAfterEnd, cl::Buffer &devPhaseOffsets, cl::Buffer &devBandPassCorrectionWeights) + : + Kernel(ps, program, "applyDelaysAndCorrectBandPass") + { + ASSERT(ps.nrChannelsPerSubband() % 16 == 0 || ps.nrChannelsPerSubband() == 1); + ASSERT(ps.nrSamplesPerChannel() % 16 == 0); + + setArg(0, devCorrectedData); + setArg(1, devFilteredData); + setArg(4, devDelaysAtBegin); + setArg(5, devDelaysAfterEnd); + setArg(6, devPhaseOffsets); + setArg(7, devBandPassCorrectionWeights); + + globalWorkSize = cl::NDRange(256, ps.nrChannelsPerSubband() == 1 ? 1 : ps.nrChannelsPerSubband() / 16, ps.nrStations()); + localWorkSize = cl::NDRange(256, 1, 1); + + size_t nrSamples = ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS; + nrOperations = nrSamples * 12; + nrBytesRead = nrBytesWritten = nrSamples * sizeof(std::complex<float>); + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter, unsigned subband) + { + setArg(2, (float) ps.subbandToFrequencyMapping()[subband]); + setArg(3, 0); // beam + Kernel::enqueue(queue, counter); + } +}; + + +class CorrelatorKernel : public Kernel +{ + public: + CorrelatorKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devVisibilities, cl::Buffer &devCorrectedData) + : +#if defined USE_4X4 + Kernel(ps, program, "correlate_4x4") +#elif defined USE_3X3 + Kernel(ps, program, "correlate_3x3") +#elif defined USE_2X2 + Kernel(ps, program, "correlate_2x2") +#else + Kernel(ps, program, "correlate") +#endif + { + setArg(0, devVisibilities); + setArg(1, devCorrectedData); + + size_t maxNrThreads, preferredMultiple; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + + std::vector<cl_context_properties> properties; + queue.getInfo<CL_QUEUE_CONTEXT>().getInfo(CL_CONTEXT_PROPERTIES, &properties); + + if (cl::Platform((cl_platform_id) properties[1]).getInfo<CL_PLATFORM_NAME>() == "AMD Accelerated Parallel Processing") + preferredMultiple = 256; + else + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, &preferredMultiple); + +#if defined USE_4X4 + unsigned quartStations = (ps.nrStations() + 2) / 4; + unsigned nrBlocks = quartStations * (quartStations + 1) / 2; +#elif defined USE_3X3 + unsigned thirdStations = (ps.nrStations() + 2) / 3; + unsigned nrBlocks = thirdStations * (thirdStations + 1) / 2; +#elif defined USE_2X2 + unsigned halfStations = (ps.nrStations() + 1) / 2; + unsigned nrBlocks = halfStations * (halfStations + 1) / 2; +#else + unsigned nrBlocks = ps.nrBaselines(); +#endif + unsigned nrPasses = (nrBlocks + maxNrThreads - 1) / maxNrThreads; + unsigned nrThreads = (nrBlocks + nrPasses - 1) / nrPasses; + nrThreads = (nrThreads + preferredMultiple - 1) / preferredMultiple * preferredMultiple; + //std::cout << "nrBlocks = " << nrBlocks << ", nrPasses = " << nrPasses << ", preferredMultiple = " << preferredMultiple << ", nrThreads = " << nrThreads << std::endl; + + globalWorkSize = cl::NDRange(nrPasses * nrThreads, ps.nrChannelsPerSubband()); + localWorkSize = cl::NDRange(nrThreads, 1); + + nrOperations = (size_t) ps.nrChannelsPerSubband() * ps.nrBaselines() * ps.nrSamplesPerChannel() * 32; + nrBytesRead = (size_t) nrPasses * ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrBaselines() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS * NR_POLARIZATIONS * sizeof(std::complex<float>); + } +}; + + +class IntToFloatKernel : public Kernel +{ + public: + IntToFloatKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devFilteredData, cl::Buffer &devInputSamples) + : + Kernel(ps, program, "intToFloat") + { + setArg(0, devFilteredData); + setArg(1, devInputSamples); + + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + globalWorkSize = cl::NDRange(maxNrThreads, ps.nrStations()); + localWorkSize = cl::NDRange(maxNrThreads, 1); + + size_t nrSamples = ps.nrStations() * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS; + nrOperations = nrSamples * 2; + nrBytesRead = nrSamples * 2 * ps.nrBitsPerSample() / 8; + nrBytesWritten = nrSamples * sizeof(std::complex<float>); + } +}; + + +class IncoherentStokesKernel : public Kernel +{ + public: + IncoherentStokesKernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devIncoherentStokes, cl::Buffer &devInputSamples) + : + Kernel(ps, program, "incoherentStokes") + { + setArg(0, devIncoherentStokes); + setArg(1, devInputSamples); + + unsigned nrTimes = ps.nrSamplesPerChannel() / ps.incoherentStokesTimeIntegrationFactor(); + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + unsigned nrPasses = (nrTimes + maxNrThreads - 1) / maxNrThreads; + unsigned nrTimesPerPass = (nrTimes + nrPasses - 1) / nrPasses; + globalWorkSize = cl::NDRange(nrTimesPerPass * nrPasses, ps.nrChannelsPerSubband()); + localWorkSize = cl::NDRange(nrTimesPerPass, 1); + + nrOperations = ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrStations() * (ps.nrIncoherentStokes() == 1 ? 8 : 20 + 2.0 / ps.incoherentStokesTimeIntegrationFactor()); + nrBytesRead = (size_t) ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrIncoherentStokes() * nrTimes * ps.nrChannelsPerSubband() * sizeof(float); + } +}; + + +class BeamFormerKernel : public Kernel +{ + public: + BeamFormerKernel(const Parset &ps, cl::Program &program, cl::Buffer &devComplexVoltages, cl::Buffer &devCorrectedData, cl::Buffer &devBeamFormerWeights) + : + Kernel(ps, program, "complexVoltages") + { + setArg(0, devComplexVoltages); + setArg(1, devCorrectedData); + setArg(2, devBeamFormerWeights); + + globalWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), ps.nrChannelsPerSubband()); + localWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), 1); + + // FIXME: nrTABs + //queue.enqueueNDRangeKernel(*this, cl::NullRange, cl::NDRange(16, ps.nrTABs(), ps.nrChannelsPerSubband()), cl::NDRange(16, ps.nrTABs(), 1), 0, &event); + + size_t count = ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS; + size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t nrSampleBytesPerPass = count * ps.nrStations() * sizeof(std::complex<float>); + size_t nrComplexVoltagesBytesPerPass = count * ps.nrTABs() * sizeof(std::complex<float>); + unsigned nrPasses = std::max((ps.nrStations() + 6) / 16, 1U); + nrOperations = count * ps.nrStations() * ps.nrTABs() * 8; + nrBytesRead = nrWeightsBytes + nrSampleBytesPerPass + (nrPasses - 1) * nrComplexVoltagesBytesPerPass; + nrBytesWritten = nrPasses * nrComplexVoltagesBytesPerPass; + } +}; + + +class BeamFormerTransposeKernel : public Kernel +{ + public: + BeamFormerTransposeKernel(const Parset &ps, cl::Program &program, cl::Buffer &devTransposedData, cl::Buffer &devComplexVoltages) + : + Kernel(ps, program, "transposeComplexVoltages") + { + ASSERT(ps.nrSamplesPerChannel() % 16 == 0); + setArg(0, devTransposedData); + setArg(1, devComplexVoltages); + + //globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, (ps.nrChannelsPerSubband() + 15) / 16); + globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, ps.nrSamplesPerChannel() / 16); + localWorkSize = cl::NDRange(256, 1, 1); + + nrOperations = 0; + nrBytesRead = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>), + //nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * sizeof(std::complex<float>); + } +}; + + +#if 0 +class Dedispersion_FFT_Kernel +{ + public: + Dedispersion_FFT_Kernel(const Parset &ps, cl::Context &context, cl::Buffer &buffer) + : + ps(ps), + plan(context, ps.dedispersionFFTsize()), + buffer(buffer) + { + ASSERT(ps.nrSamplesPerChannel() % ps.dedispersionFFTsize() == 0); + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter, clFFT_Direction direction) + { + size_t nrFFTs = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(); + + cl_int error = clFFT_ExecuteInterleaved(queue(), plan.plan, nrFFTs, direction, buffer(), buffer(), 0, 0, &event()); + + if (error != CL_SUCCESS) + throw cl::Error(error, "clFFT_ExecuteInterleaved"); + + counter.doOperation(event, + nrFFTs * 5 * ps.dedispersionFFTsize() * log2(ps.dedispersionFFTsize()), + nrFFTs * ps.dedispersionFFTsize() * sizeof(std::complex<float>), + nrFFTs * ps.dedispersionFFTsize() * sizeof(std::complex<float>)); + } + + private: + const Parset &ps; + FFT_Plan plan; + cl::Buffer &buffer; + cl::Event event; +}; +#else +class DedispersionForwardFFTkernel : public FFT_Kernel +{ + public: + DedispersionForwardFFTkernel(const Parset &ps, cl::Context &context, cl::Buffer &buffer) + : + FFT_Kernel(context, ps.dedispersionFFTsize(), ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(), true, buffer) + { + ASSERT(ps.nrSamplesPerChannel() % ps.dedispersionFFTsize() == 0); + } +}; + + +class DedispersionBackwardFFTkernel : public FFT_Kernel +{ + public: + DedispersionBackwardFFTkernel(const Parset &ps, cl::Context &context, cl::Buffer &buffer) + : + FFT_Kernel(context, ps.dedispersionFFTsize(), ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(), false, buffer) + { + ASSERT(ps.nrSamplesPerChannel() % ps.dedispersionFFTsize() == 0); + } +}; +#endif + + +class DedispersionChirpKernel : public Kernel +{ + public: + DedispersionChirpKernel(const Parset &ps, cl::Program &program, cl::CommandQueue &queue, cl::Buffer &buffer, cl::Buffer &DMs) + : + Kernel(ps, program, "applyChirp") + { + setArg(0, buffer); + setArg(1, DMs); + + size_t maxNrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + unsigned fftSize = ps.dedispersionFFTsize(); + + globalWorkSize = cl::NDRange(fftSize, ps.nrSamplesPerChannel() / fftSize, ps.nrChannelsPerSubband()); + //std::cout << "globalWorkSize = NDRange(" << fftSize << ", " << ps.nrSamplesPerChannel() / fftSize << ", " << ps.nrChannelsPerSubband() << ')' << std::endl; + + if (fftSize <= maxNrThreads) { + localWorkSize = cl::NDRange(fftSize, 1, maxNrThreads / fftSize); + //std::cout << "localWorkSize = NDRange(" << fftSize << ", 1, " << maxNrThreads / fftSize << ')' << std::endl; + } else { + unsigned divisor; + + for (divisor = 1; fftSize / divisor > maxNrThreads || fftSize % divisor != 0; divisor ++) + ; + + localWorkSize = cl::NDRange(fftSize / divisor, 1, 1); + //std::cout << "localWorkSize = NDRange(" << fftSize / divisor << ", 1, 1))" << std::endl; + } + + nrOperations = (size_t) NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * (9 * ps.nrTABs() + 17), + nrBytesRead = nrBytesWritten = sizeof(std::complex<float>) * ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel(); + } + + void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter, double subbandFrequency) + { + setArg(2, (float) subbandFrequency); + Kernel::enqueue(queue, counter); + } +}; + + +class CoherentStokesKernel : public Kernel +{ + public: + CoherentStokesKernel(const Parset &ps, cl::Program &program, cl::Buffer &devStokesData, cl::Buffer &devComplexVoltages) + : + Kernel(ps, program, "coherentStokes") + { + ASSERT(ps.nrChannelsPerSubband() >= 16 && ps.nrChannelsPerSubband() % 16 == 0); + ASSERT(ps.nrCoherentStokes() == 1 || ps.nrCoherentStokes() == 4); + setArg(0, devStokesData); + setArg(1, devComplexVoltages); + + globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, (ps.nrChannelsPerSubband() + 15) / 16); + localWorkSize = cl::NDRange(256, 1, 1); + + nrOperations = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * (ps.nrCoherentStokes() == 1 ? 8 : 20 + 2.0 / ps.coherentStokesTimeIntegrationFactor()); + nrBytesRead = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrTABs() * ps.nrCoherentStokes() * ps.nrSamplesPerChannel() / ps.coherentStokesTimeIntegrationFactor() * ps.nrChannelsPerSubband() * sizeof(float); + } +}; + + +template <typename SampleType> class UHEP_BeamFormerKernel : public Kernel +{ + public: + UHEP_BeamFormerKernel(const Parset &ps, cl::Program &program, cl::Buffer &devComplexVoltages, cl::Buffer &devInputSamples, cl::Buffer &devBeamFormerWeights) + : + Kernel(ps, program, "complexVoltages") + { + setArg(0, devComplexVoltages); + setArg(1, devInputSamples); + setArg(2, devBeamFormerWeights); + +#if 1 + globalWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), ps.nrSubbands()); + localWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), 1); + + size_t count = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS; + size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrSubbands() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t nrSampleBytes = count * ps.nrStations() * sizeof(SampleType); + size_t nrComplexVoltagesBytesPerPass = count * ps.nrTABs() * sizeof(std::complex<float>); + unsigned nrPasses = std::max((ps.nrStations() + 6) / 16, 1U); + nrOperations = count * ps.nrStations() * ps.nrTABs() * 8; + nrBytesRead = nrWeightsBytes + nrSampleBytes + (nrPasses - 1) * nrComplexVoltagesBytesPerPass; + nrBytesWritten = nrPasses * nrComplexVoltagesBytesPerPass; +#else + ASSERT(ps.nrTABs() % 3 == 0); + ASSERT(ps.nrStations() % 6 == 0); + unsigned nrThreads = NR_POLARIZATIONS * (ps.nrTABs() / 3) * (ps.nrStations() / 6); + globalWorkSize = cl::NDRange(nrThreads, ps.nrSubbands()); + localWorkSize = cl::NDRange(nrThreads, 1); + //globalWorkSize = cl::NDRange(ps.nrStations() / 6, ps.nrTABs() / 3, ps.nrSubbands()); + //localWorkSize = cl::NDRange(ps.nrStations() / 6, ps.nrTABs() / 3, 1); + + size_t count = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS; + size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrSubbands() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t nrSampleBytes = count * ps.nrStations() * sizeof(SampleType); + size_t nrComplexVoltagesBytes = count * ps.nrTABs() * sizeof(std::complex<float>); + nrOperations = count * ps.nrStations() * ps.nrTABs() * 8; + nrBytesRead = nrWeightsBytes + nrSampleBytes; + nrBytesWritten = nrComplexVoltagesBytes; +#endif + } +}; + + +class UHEP_TransposeKernel : public Kernel +{ + public: + UHEP_TransposeKernel(const Parset &ps, cl::Program &program, cl::Buffer &devFFTedData, cl::Buffer &devComplexVoltages, cl::Buffer &devReverseSubbandMapping) + : + Kernel(ps, program, "UHEP_Transpose") + { + setArg(0, devFFTedData); + setArg(1, devComplexVoltages); + setArg(2, devReverseSubbandMapping); + + globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, 512 / 16); + localWorkSize = cl::NDRange(256, 1, 1); + + nrOperations = 0; + nrBytesRead = (size_t) ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>); + nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * 512 * sizeof(std::complex<float>); + } +}; + + +class UHEP_InvFFT_Kernel : public Kernel +{ + public: + UHEP_InvFFT_Kernel(const Parset &ps, cl::Program &program, cl::Buffer &devFFTedData) + : + Kernel(ps, program, "inv_fft") + { + setArg(0, devFFTedData); + setArg(1, devFFTedData); + + globalWorkSize = cl::NDRange(128, ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel()); + localWorkSize = cl::NDRange(128, 1); + + size_t nrFFTs = (size_t) ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1); + nrOperations = nrFFTs * 5 * 1024 * 10; + nrBytesRead = nrFFTs * 512 * sizeof(std::complex<float>); + nrBytesWritten = nrFFTs * 1024 * sizeof(float); + } +}; + + +class UHEP_InvFIR_Kernel : public Kernel +{ + public: + UHEP_InvFIR_Kernel(const Parset &ps, cl::CommandQueue &queue, cl::Program &program, cl::Buffer &devInvFIRfilteredData, cl::Buffer &devFFTedData, cl::Buffer &devInvFIRfilterWeights) + : + Kernel(ps, program, "invFIRfilter") + { + setArg(0, devInvFIRfilteredData); + setArg(1, devFFTedData); + setArg(2, devInvFIRfilterWeights); + + size_t maxNrThreads, nrThreads; + getWorkGroupInfo(queue.getInfo<CL_QUEUE_DEVICE>(), CL_KERNEL_WORK_GROUP_SIZE, &maxNrThreads); + // round down to nearest power of two + for (nrThreads = 1024; nrThreads > maxNrThreads; nrThreads /= 2) + ; + + globalWorkSize = cl::NDRange(1024, NR_POLARIZATIONS, ps.nrTABs()); + localWorkSize = cl::NDRange(nrThreads, 1, 1); + + size_t count = ps.nrTABs() * NR_POLARIZATIONS * 1024; + nrOperations = count * ps.nrSamplesPerChannel() * NR_STATION_FILTER_TAPS * 2; + nrBytesRead = count * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * sizeof(float); + nrBytesWritten = count * ps.nrSamplesPerChannel() * sizeof(float); + } +}; + + +class UHEP_TriggerKernel : public Kernel +{ + public: + UHEP_TriggerKernel(const Parset &ps, cl::Program &program, cl::Buffer &devTriggerInfo, cl::Buffer &devInvFIRfilteredData) + : + Kernel(ps, program, "trigger") + { + setArg(0, devTriggerInfo); + setArg(1, devInvFIRfilteredData); + + globalWorkSize = cl::NDRange(16, 16, ps.nrTABs()); + localWorkSize = cl::NDRange(16, 16, 1); + + nrOperations = (size_t) ps.nrTABs() * ps.nrSamplesPerChannel() * 1024 * (3 /* power */ + 2 /* window */ + 1 /* max */ + 7 /* mean/variance */); + nrBytesRead = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * 1024 * sizeof(float); + nrBytesWritten = (size_t) ps.nrTABs() * sizeof(TriggerInfo); + } +}; + + +WorkQueue::WorkQueue(Pipeline &pipeline) +: + gpu(omp_get_thread_num() % nrGPUs), + device(pipeline.devices[gpu]), + ps(pipeline.ps) +{ +#if defined __linux__ && defined USE_B7015 + set_affinity(gpu); +#endif + + queue = cl::CommandQueue(pipeline.context, device, profiling ? CL_QUEUE_PROFILING_ENABLE : 0); +} + + +template <typename SampleType> CorrelatorWorkQueue<SampleType>::CorrelatorWorkQueue(CorrelatorPipeline &pipeline) +: + WorkQueue(pipeline), + pipeline(pipeline), + bandPassCorrectionWeights(boost::extents[ps.nrChannelsPerSubband()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAtBegin(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAfterEnd(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + phaseOffsets(boost::extents[ps.nrBeams()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + inputSamples(boost::extents[ps.nrStations()][(ps.nrSamplesPerChannel() + NR_TAPS - 1) * ps.nrChannelsPerSubband()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + visibilities(boost::extents[ps.nrBaselines()][ps.nrChannelsPerSubband()][NR_POLARIZATIONS][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY) +{ + size_t firWeightsSize = ps.nrChannelsPerSubband() * NR_TAPS * sizeof(float); + devFIRweights = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, firWeightsSize); + + size_t filteredDataSize = ps.nrStations() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>); + devFilteredData = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, filteredDataSize); + devCorrectedData = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, filteredDataSize); + + if (ps.correctBandPass()) { + BandPass::computeCorrectionFactors(bandPassCorrectionWeights.origin(), ps.nrChannelsPerSubband()); + bandPassCorrectionWeights.hostToDevice(CL_TRUE); + } +} + + +template <typename SampleType> void CorrelatorWorkQueue<SampleType>::doWork() +{ + queue.enqueueWriteBuffer(devFIRweights, CL_TRUE, 0, ps.nrChannelsPerSubband() * NR_TAPS * sizeof(float), pipeline.filterBank.getWeights().origin()); + + FIR_FilterKernel<SampleType> firFilterKernel(ps, queue, pipeline.firFilterProgram, devFilteredData, inputSamples, devFIRweights); + Filter_FFT_Kernel fftKernel(ps, pipeline.context, devFilteredData); + DelayAndBandPassKernel delayAndBandPassKernel(ps, pipeline.delayAndBandPassProgram, devCorrectedData, devFilteredData, delaysAtBegin, delaysAfterEnd, phaseOffsets, bandPassCorrectionWeights); + CorrelatorKernel correlatorKernel(ps, queue, pipeline.correlatorProgram, visibilities, devCorrectedData); + double startTime = ps.startTime(), currentTime, stopTime = ps.stopTime(), blockTime = ps.CNintegrationTime(); + +#pragma omp barrier + + double executionStartTime = getTime(); + + for (unsigned block = 0; (currentTime = startTime + block * blockTime) < stopTime; block ++) { +#pragma omp single +#pragma omp critical (cout) + std::cout << "block = " << block << ", time = " << to_simple_string(from_ustime_t(currentTime)) << std::endl; + + memset(delaysAtBegin.origin(), 0, delaysAtBegin.bytesize()); + memset(delaysAfterEnd.origin(), 0, delaysAfterEnd.bytesize()); + memset(phaseOffsets.origin(), 0, phaseOffsets.bytesize()); + + // FIXME!!! + if (ps.nrStations() >= 3) + delaysAtBegin[0][2][0] = 1e-6, delaysAfterEnd[0][2][0] = 1.1e-6; + + delaysAtBegin.hostToDevice(CL_FALSE); + delaysAfterEnd.hostToDevice(CL_FALSE); + phaseOffsets.hostToDevice(CL_FALSE); + queue.finish(); + +#pragma omp barrier + +#pragma omp for schedule(dynamic) + for (unsigned subband = 0; subband < ps.nrSubbands(); subband ++) { + try { +#if defined USE_TEST_DATA + if (subband == 0) + setTestPattern(); +#endif + + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.hostToDeviceLock[gpu / 2]); +#endif + inputSamples.hostToDevice(CL_TRUE); + pipeline.samplesCounter.doOperation(inputSamples.event, 0, 0, inputSamples.bytesize()); + } + + if (ps.nrChannelsPerSubband() > 1) { + firFilterKernel.enqueue(queue, pipeline.firFilterCounter); + fftKernel.enqueue(queue, pipeline.fftCounter); + } + + delayAndBandPassKernel.enqueue(queue, pipeline.delayAndBandPassCounter, subband); + correlatorKernel.enqueue(queue, pipeline.correlatorCounter); + queue.finish(); + + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.deviceToHostLock[gpu / 2]); +#endif + visibilities.deviceToHost(CL_TRUE); + pipeline.visibilitiesCounter.doOperation(visibilities.event, 0, visibilities.bytesize(), 0); + } + +#if defined USE_TEST_DATA + if (subband == 0) + printTestOutput(); +#endif + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + } + } + +#pragma omp barrier + +#pragma omp master + if (!profiling) +#pragma omp critical (cout) + std::cout << "run time = " << getTime() - executionStartTime << std::endl; +} + + +// complexVoltages() +// float2 (*ComplexVoltagesType)[NR_CHANNELS][NR_TIMES_PER_BLOCK][NR_TABS][NR_POLARIZATIONS]; +// transpose() +// +// float2 (*DedispersedDataType)[nrTABs][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()]; +// FFT() +// +// applyChrip() +// +// FFT-1() +// float2 (*DedispersedDataType)[nrTABs][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()]; +// (*ComplexVoltagesType)[NR_CHANNELS][NR_TIMES_PER_BLOCK][NR_TABS]; +// computeStokes() +// float (*StokesType)[NR_TABS][NR_STOKES][NR_TIMES_PER_BLOCK / STOKES_INTEGRATION_SAMPLES][NR_CHANNELS]; + + +template <typename SampleType> BeamFormerWorkQueue<SampleType>::BeamFormerWorkQueue(BeamFormerPipeline &pipeline) +: + WorkQueue(pipeline), + pipeline(pipeline), + inputSamples(boost::extents[ps.nrStations()][ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + devFilteredData(pipeline.context, CL_MEM_READ_WRITE, ps.nrStations() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>)), + bandPassCorrectionWeights(boost::extents[ps.nrChannelsPerSubband()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAtBegin(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + delaysAfterEnd(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + phaseOffsets(boost::extents[ps.nrBeams()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + devCorrectedData(cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS * sizeof(std::complex<float>))), + beamFormerWeights(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY), + devComplexVoltages(cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>))), + //transposedComplexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE) + transposedComplexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE), + DMs(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY) +{ + if (ps.correctBandPass()) { + BandPass::computeCorrectionFactors(bandPassCorrectionWeights.origin(), ps.nrChannelsPerSubband()); + bandPassCorrectionWeights.hostToDevice(CL_TRUE); + } +} + + +template <typename SampleType> void BeamFormerWorkQueue<SampleType>::doWork() +{ + //queue.enqueueWriteBuffer(devFIRweights, CL_TRUE, 0, firWeightsSize, firFilterWeights); + bandPassCorrectionWeights.hostToDevice(CL_TRUE); + DMs.hostToDevice(CL_TRUE); + + IntToFloatKernel intToFloatKernel(ps, queue, pipeline.intToFloatProgram, devFilteredData, inputSamples); + Filter_FFT_Kernel fftKernel(ps, pipeline.context, devFilteredData); + DelayAndBandPassKernel delayAndBandPassKernel(ps, pipeline.delayAndBandPassProgram, devCorrectedData, devFilteredData, delaysAtBegin, delaysAfterEnd, phaseOffsets, bandPassCorrectionWeights); + BeamFormerKernel beamFormerKernel(ps, pipeline.beamFormerProgram, devComplexVoltages, devCorrectedData, beamFormerWeights); + BeamFormerTransposeKernel transposeKernel(ps, pipeline.transposeProgram, transposedComplexVoltages, devComplexVoltages); + DedispersionForwardFFTkernel dedispersionForwardFFTkernel(ps, pipeline.context, transposedComplexVoltages); + DedispersionBackwardFFTkernel dedispersionBackwardFFTkernel(ps, pipeline.context, transposedComplexVoltages); + DedispersionChirpKernel dedispersionChirpKernel(ps, pipeline.dedispersionChirpProgram, queue, transposedComplexVoltages, DMs); + double startTime = ps.startTime(), currentTime, stopTime = ps.stopTime(), blockTime = ps.CNintegrationTime(); + +#pragma omp barrier + + double executionStartTime = getTime(); + + for (unsigned block = 0; (currentTime = startTime + block * blockTime) < stopTime; block ++) { +#pragma omp single +#pragma omp critical (cout) + std::cout << "block = " << block << ", time = " << to_simple_string(from_ustime_t(currentTime)) << std::endl; + + memset(delaysAtBegin.origin(), 0, delaysAtBegin.bytesize()); + memset(delaysAfterEnd.origin(), 0, delaysAfterEnd.bytesize()); + memset(phaseOffsets.origin(), 0, phaseOffsets.bytesize()); + + // FIXME!!! + if (ps.nrStations() >= 3) + delaysAtBegin[0][2][0] = 1e-6, delaysAfterEnd[0][2][0] = 1.1e-6; + + delaysAtBegin.hostToDevice(CL_FALSE); + delaysAfterEnd.hostToDevice(CL_FALSE); + phaseOffsets.hostToDevice(CL_FALSE); + beamFormerWeights.hostToDevice(CL_FALSE); + queue.finish(); + +#pragma omp barrier + +#pragma omp for schedule(dynamic) + for (unsigned subband = 0; subband < ps.nrSubbands(); subband ++) { + try { +#if 1 + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.hostToDeviceLock[gpu / 2]); +#endif + inputSamples.hostToDevice(CL_TRUE); + pipeline.samplesCounter.doOperation(inputSamples.event, 0, 0, inputSamples.bytesize()); + } +#endif + +//#pragma omp critical (GPU) +{ + if (ps.nrChannelsPerSubband() > 1) { + intToFloatKernel.enqueue(queue, pipeline.intToFloatCounter); + fftKernel.enqueue(queue, pipeline.fftCounter); + } + + delayAndBandPassKernel.enqueue(queue, pipeline.delayAndBandPassCounter, subband); + beamFormerKernel.enqueue(queue, pipeline.beamFormerCounter); + transposeKernel.enqueue(queue, pipeline.transposeCounter); + dedispersionForwardFFTkernel.enqueue(queue, pipeline.dedispersionForwardFFTcounter); + dedispersionChirpKernel.enqueue(queue, pipeline.dedispersionChirpCounter, ps.subbandToFrequencyMapping()[subband]); + dedispersionBackwardFFTkernel.enqueue(queue, pipeline.dedispersionBackwardFFTcounter); + + queue.finish(); +} + + //queue.enqueueReadBuffer(devComplexVoltages, CL_TRUE, 0, hostComplexVoltages.bytesize(), hostComplexVoltages.origin()); + //dedispersedData.deviceToHost(CL_TRUE); + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + } + } + +#pragma omp barrier + +#pragma omp master + if (!profiling) +#pragma omp critical (cout) + std::cout << "run time = " << getTime() - executionStartTime << std::endl; +} + + +template <typename SampleType> UHEP_WorkQueue<SampleType>::UHEP_WorkQueue(UHEP_Pipeline &pipeline) +: + WorkQueue(pipeline), + pipeline(pipeline), + hostInputSamples(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY), + hostBeamFormerWeights(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY), + hostTriggerInfo(ps.nrTABs(), queue, CL_MEM_READ_ONLY) +{ + size_t inputSamplesSize = ps.nrStations() * ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS * sizeof(SampleType); + size_t complexVoltagesSize = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>); + size_t transposedDataSize = ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * 512 * sizeof(std::complex<float>); + size_t invFIRfilteredDataSize = ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * 512 * sizeof(std::complex<float>); + + size_t buffer0size = std::max(inputSamplesSize, transposedDataSize); + size_t buffer1size = std::max(complexVoltagesSize, invFIRfilteredDataSize); + + devBuffers[0] = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, buffer0size); + devBuffers[1] = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, buffer1size); + + size_t beamFormerWeightsSize = ps.nrStations() * ps.nrSubbands() * ps.nrTABs() * sizeof(std::complex<float>); + devBeamFormerWeights = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, beamFormerWeightsSize); + + devInputSamples = devBuffers[0]; + devComplexVoltages = devBuffers[1]; + + devReverseSubbandMapping = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, 512 * sizeof(int)); + devInvFIRfilterWeights = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, 1024 * NR_STATION_FILTER_TAPS * sizeof(float)); + devFFTedData = devBuffers[0]; + devInvFIRfilteredData = devBuffers[1]; + + devTriggerInfo = cl::Buffer(pipeline.context, CL_MEM_WRITE_ONLY, ps.nrTABs() * sizeof(TriggerInfo)); +} + + +template <typename SampleType> void UHEP_WorkQueue<SampleType>::doWork(const float * /*delaysAtBegin*/, const float * /*delaysAfterEnd*/, const float * /*phaseOffsets*/) +{ + UHEP_BeamFormerKernel<SampleType> beamFormer(ps, pipeline.beamFormerProgram, devComplexVoltages, devInputSamples, devBeamFormerWeights); + UHEP_TransposeKernel transpose(ps, pipeline.transposeProgram, devFFTedData, devComplexVoltages, devReverseSubbandMapping); + UHEP_InvFFT_Kernel invFFT(ps, pipeline.invFFTprogram, devFFTedData); + UHEP_InvFIR_Kernel invFIR(ps, queue, pipeline.invFIRfilterProgram, devInvFIRfilteredData, devFFTedData, devInvFIRfilterWeights); + UHEP_TriggerKernel trigger(ps, pipeline.triggerProgram, devTriggerInfo, devInvFIRfilteredData); + double startTime = ps.startTime(), stopTime = ps.stopTime(), blockTime = ps.CNintegrationTime(); + unsigned nrBlocks = (stopTime - startTime) / blockTime; + + queue.enqueueWriteBuffer(devInvFIRfilterWeights, CL_FALSE, 0, sizeof invertedStationPPFWeights, invertedStationPPFWeights); + queue.enqueueWriteBuffer(devReverseSubbandMapping, CL_TRUE, 0, 512 * sizeof(int), reverseSubbandMapping); + +#pragma omp barrier + + double executionStartTime = getTime(); + +#pragma omp for schedule(dynamic) + for (unsigned block = 0; block < nrBlocks; block ++) { + try { + double currentTime = startTime + block * blockTime; + +//#pragma omp single // FIXME: why does the compiler complain here??? +#pragma omp critical (cout) + std::cout << "block = " << block << ", time = " << to_simple_string(from_ustime_t(currentTime)) << std::endl; + +#if 0 + { +#if defined USE_B7015 + OMP_ScopedLock scopedLock(pipeline.hostToDeviceLock[gpu / 2]); +#endif + queue.enqueueWriteBuffer(devInputSamples, CL_TRUE, 0, sampledDataSize, hostInputSamples.origin(), 0, &samplesEvent); + } +#endif + + queue.enqueueWriteBuffer(devBeamFormerWeights, CL_FALSE, 0, hostBeamFormerWeights.bytesize(), hostBeamFormerWeights.origin(), 0, &beamFormerWeightsEvent); + pipeline.beamFormerWeightsCounter.doOperation(beamFormerWeightsEvent, 0, 0, hostBeamFormerWeights.bytesize()); + + queue.enqueueWriteBuffer(devInputSamples, CL_FALSE, 0, hostInputSamples.bytesize(), hostInputSamples.origin(), 0, &inputSamplesEvent); + pipeline.samplesCounter.doOperation(inputSamplesEvent, 0, 0, hostInputSamples.bytesize()); + + beamFormer.enqueue(queue, pipeline.beamFormerCounter); + transpose.enqueue(queue, pipeline.transposeCounter); + invFFT.enqueue(queue, pipeline.invFFTcounter); + invFIR.enqueue(queue, pipeline.invFIRfilterCounter); + trigger.enqueue(queue, pipeline.triggerCounter); + queue.finish(); // necessary to overlap I/O & computations ??? + queue.enqueueReadBuffer(devTriggerInfo, CL_TRUE, 0, hostTriggerInfo.size() * sizeof(TriggerInfo), &hostTriggerInfo[0]); + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + } + +#pragma omp barrier + +#pragma omp master + if (!profiling) +#pragma omp critical (cout) + std::cout << "run time = " << getTime() - executionStartTime << std::endl; +} + + +#if defined USE_TEST_DATA + +template <typename SampleType> void CorrelatorWorkQueue<SampleType>::setTestPattern() +{ + if (ps.nrStations() >= 3) { + double centerFrequency = 384 * ps.sampleRate(); + double baseFrequency = centerFrequency - .5 * ps.sampleRate(); + unsigned testSignalChannel = ps.nrChannelsPerSubband() >= 231 ? 230 : ps.nrChannelsPerSubband() / 2; + double signalFrequency = baseFrequency + testSignalChannel * ps.sampleRate() / ps.nrChannelsPerSubband(); + + for (unsigned time = 0; time < (NR_TAPS - 1 + ps.nrSamplesPerChannel()) * ps.nrChannelsPerSubband(); time ++) { + double phi = 2.0 * M_PI * signalFrequency * time / ps.sampleRate(); + + switch (sizeof(SampleType)) { + case 4 : hostInputSamples[2][time][1] = SampleType((short) rint(32767 * cos(phi)), (short) rint(32767 * sin(phi))); + break; + + case 2 : hostInputSamples[2][time][1] = SampleType((signed char) rint(127 * cos(phi)), (signed char) rint(127 * sin(phi))); + break; + } + } + } +} + + +template <typename SampleType> void CorrelatorWorkQueue<SampleType>::printTestOutput() +{ + if (ps.nrBaselines() >= 6) +#pragma omp critical (cout) + { + std::cout << "newgraph newcurve linetype solid pts" << std::endl; + + //for (int channel = 0; channel < ps.nrChannelsPerSubband(); channel ++) + if (ps.nrChannelsPerSubband() == 256) + for (int channel = 228; channel <= 232; channel ++) + std::cout << channel << ' ' << hostVisibilities[5][channel][1][1] << std::endl; + } +} + +#endif + + +void CorrelatorPipeline::doWork() +{ +#pragma omp parallel num_threads((profiling ? 1 : 2) * nrGPUs) + try + { + switch (ps.nrBitsPerSample()) { + case 4 : CorrelatorWorkQueue<LOFAR::i4complex>(*this).doWork(); + break; + + case 8 : CorrelatorWorkQueue<std::complex<signed char> >(*this).doWork(); + break; + + case 16 : CorrelatorWorkQueue<std::complex<short> >(*this).doWork(); + break; + } + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } +} + + +void BeamFormerPipeline::doWork() +{ +#pragma omp parallel num_threads((profiling ? 1 : 2) * nrGPUs) + try + { + switch (ps.nrBitsPerSample()) { + case 4 : BeamFormerWorkQueue<LOFAR::i4complex>(*this).doWork(); + break; + + case 8 : BeamFormerWorkQueue<std::complex<signed char> >(*this).doWork(); + break; + + case 16 : BeamFormerWorkQueue<std::complex<short> >(*this).doWork(); + break; + } + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } +} + + +void UHEP_Pipeline::doWork() +{ + float delaysAtBegin[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS] __attribute__((aligned(32))); + float delaysAfterEnd[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS] __attribute__((aligned(32))); + float phaseOffsets[ps.nrStations()][NR_POLARIZATIONS] __attribute__((aligned(32))); + + memset(delaysAtBegin, 0, sizeof delaysAtBegin); + memset(delaysAfterEnd, 0, sizeof delaysAfterEnd); + memset(phaseOffsets, 0, sizeof phaseOffsets); + delaysAtBegin[0][2][0] = 1e-6, delaysAfterEnd[0][2][0] = 1.1e-6; + +#pragma omp parallel num_threads((profiling ? 1 : 2) * nrGPUs) + try + { + switch (ps.nrBitsPerSample()) { + case 4 : UHEP_WorkQueue<LOFAR::i4complex>(*this).doWork(&delaysAtBegin[0][0][0], &delaysAfterEnd[0][0][0], &phaseOffsets[0][0]); + break; + + case 8 : UHEP_WorkQueue<std::complex<signed char> >(*this).doWork(&delaysAtBegin[0][0][0], &delaysAfterEnd[0][0][0], &phaseOffsets[0][0]); + break; + + case 16 : UHEP_WorkQueue<std::complex<short> >(*this).doWork(&delaysAtBegin[0][0][0], &delaysAfterEnd[0][0][0], &phaseOffsets[0][0]); + break; + } + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } +} + + +class UnitTest +{ + protected: + UnitTest(const Parset &ps, const char *programName = 0) + : + counter(programName != 0 ? programName : "test") + { + createContext(context, devices); + queue = cl::CommandQueue(context, devices[0], CL_QUEUE_PROFILING_ENABLE); + + if (programName != 0) + program = createProgram(ps, context, devices, programName); + } + + template <typename T> void check(T actual, T expected) + { + if (expected != actual) { + std::cerr << "Test FAILED: expected " << expected << ", computed " << actual << std::endl; + exit(1); + } else { + std::cout << "Test OK" << std::endl; + } + } + + cl::Context context; + std::vector<cl::Device> devices; + cl::Program program; + cl::CommandQueue queue; + + PerformanceCounter counter; +}; + + +struct CorrelatorTest : public UnitTest +{ + CorrelatorTest(const Parset &ps) + : + UnitTest(ps, "Correlator.cl") + { + if (ps.nrStations() >= 5 && ps.nrChannelsPerSubband() >= 6 && ps.nrSamplesPerChannel() >= 100) { + MultiArraySharedBuffer<std::complex<float>, 4> inputData(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> visibilities(boost::extents[ps.nrBaselines()][ps.nrChannelsPerSubband()][NR_POLARIZATIONS][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + CorrelatorKernel correlator(ps, queue, program, visibilities, inputData); + + inputData[3][5][99][1] = std::complex<float>(3, 4); + inputData[4][5][99][1] = std::complex<float>(5, 6); + + inputData.hostToDevice(CL_FALSE); + correlator.enqueue(queue, counter); + visibilities.deviceToHost(CL_TRUE); + + check(visibilities[13][5][1][1], std::complex<float>(39, 2)); + } + } +}; + + +struct IncoherentStokesTest : public UnitTest +{ + IncoherentStokesTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/IncoherentStokes.cl") + { + if (ps.nrStations() >= 5 && ps.nrChannelsPerSubband() >= 14 && ps.nrSamplesPerChannel() >= 108) { + MultiArraySharedBuffer<std::complex<float>, 4> inputData(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<float, 3> stokesData(boost::extents[ps.nrIncoherentStokes()][ps.nrSamplesPerChannel() / ps.incoherentStokesTimeIntegrationFactor()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + IncoherentStokesKernel kernel(ps, queue, program, stokesData, inputData); + + inputData[4][13][107][0] = std::complex<float>(2, 3); + inputData[4][13][107][1] = std::complex<float>(4, 5); + + inputData.hostToDevice(CL_FALSE); + kernel.enqueue(queue, counter); + stokesData.deviceToHost(CL_TRUE); + + const static float expected[] = { 54, -28, 46, 4 }; + + for (unsigned stokes = 0; stokes < ps.nrIncoherentStokes(); stokes ++) + check(stokesData[stokes][107 / ps.incoherentStokesTimeIntegrationFactor()][13], expected[stokes]); + } + } +}; + + +template <typename SampleType> struct IntToFloatTest : public UnitTest +{ + IntToFloatTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/IntToFloat.cl") + { + if (ps.nrStations() >= 3 && ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() >= 10077) { + MultiArraySharedBuffer<SampleType, 3> inputData(boost::extents[ps.nrStations()][ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 3> outputData(boost::extents[ps.nrStations()][NR_POLARIZATIONS][ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + IntToFloatKernel kernel(ps, queue, program, outputData, inputData); + + inputData[2][10076][1] = 7; + inputData.hostToDevice(CL_FALSE); + kernel.enqueue(queue, counter); + outputData.deviceToHost(CL_TRUE); + check(outputData[2][1][10076], std::complex<float>(7.0f, 0)); + } + } +}; + + +struct BeamFormerTest : public UnitTest +{ + BeamFormerTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/BeamFormer.cl") + { + if (ps.nrStations() >= 5 && ps.nrSamplesPerChannel() >= 13 && ps.nrChannelsPerSubband() >= 7 && ps.nrTABs() >= 6) { + MultiArraySharedBuffer<std::complex<float>, 4> inputData(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE); + BeamFormerKernel beamFormer(ps, program, complexVoltages, inputData, beamFormerWeights); + + inputData[4][6][12][1] = std::complex<float>(2.2, 3); + beamFormerWeights[4][6][5] = std::complex<float>(4, 5); + + inputData.hostToDevice(CL_FALSE); + beamFormerWeights.hostToDevice(CL_FALSE); + beamFormer.enqueue(queue, counter); + complexVoltages.deviceToHost(CL_TRUE); + + check(complexVoltages[6][12][5][1], std::complex<float>(-6.2, 23)); + +#if 0 + for (unsigned tab = 0; tab < ps.nrTABs(); tab ++) + for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) + for (unsigned ch = 0; ch < ps.nrChannelsPerSubband(); ch ++) + for (unsigned t = 0; t < ps.nrSamplesPerChannel(); t ++) + if (complexVoltages[tab][pol][ch][t] != std::complex<float>(0, 0)) + std::cout << "complexVoltages[" << tab << "][" << pol << "][" << ch << "][" << t << "] = " << complexVoltages[tab][pol][ch][t] << std::endl; +#endif + } + } +}; + + +struct BeamFormerTransposeTest : public UnitTest +{ + BeamFormerTransposeTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/Transpose.cl") + { + if (ps.nrChannelsPerSubband() >= 19 && ps.nrSamplesPerChannel() >= 175 && ps.nrTABs() >= 5) { + MultiArraySharedBuffer<std::complex<float>, 4> transposedData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + BeamFormerTransposeKernel transpose(ps, program, transposedData, complexVoltages); + + complexVoltages[18][174][4][1] = std::complex<float>(24, 42); + + complexVoltages.hostToDevice(CL_FALSE); + transpose.enqueue(queue, counter); + transposedData.deviceToHost(CL_TRUE); + + check(transposedData[4][1][174][18], std::complex<float>(24, 42)); + } + } +}; + + +struct DedispersionChirpTest : public UnitTest +{ + DedispersionChirpTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/Dedispersion.cl") + { + if (ps.nrTABs() > 3 && ps.nrChannelsPerSubband() > 13 && ps.nrSamplesPerChannel() / ps.dedispersionFFTsize() > 1 && ps.dedispersionFFTsize() > 77) { + MultiArraySharedBuffer<std::complex<float>, 5> data(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel() / ps.dedispersionFFTsize()][ps.dedispersionFFTsize()], queue, CL_MEM_READ_WRITE, CL_MEM_READ_WRITE); + MultiArraySharedBuffer<float, 1> DMs(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + DedispersionChirpKernel dedispersionChirpKernel(ps, program, queue, data, DMs); + + data[3][1][13][1][77] = std::complex<float>(2, 3); + DMs[3] = 2; + + DMs.hostToDevice(CL_FALSE); + data.hostToDevice(CL_FALSE); + dedispersionChirpKernel.enqueue(queue, counter, 60e6); + data.deviceToHost(CL_TRUE); + + std::cout << data[3][1][13][1][77] << std::endl; + } + } +}; + + +struct CoherentStokesTest : public UnitTest +{ + CoherentStokesTest(const Parset &ps) + : + UnitTest(ps, "BeamFormer/CoherentStokes.cl") + { + if (ps.nrChannelsPerSubband() >= 19 && ps.nrSamplesPerChannel() >= 175 && ps.nrTABs() >= 5) { + MultiArraySharedBuffer<float, 4> stokesData(boost::extents[ps.nrTABs()][ps.nrCoherentStokes()][ps.nrSamplesPerChannel() / ps.coherentStokesTimeIntegrationFactor()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); +#if 1 + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + CoherentStokesKernel stokesKernel(ps, program, stokesData, complexVoltages); + + complexVoltages[18][174][4][0] = std::complex<float>(2, 3); + complexVoltages[18][174][4][1] = std::complex<float>(4, 5); +#else + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + CoherentStokesKernel stokesKernel(ps, program, stokesData, complexVoltages); + + complexVoltages[18][174][4][0] = std::complex<float>(2, 3); + complexVoltages[18][174][4][1] = std::complex<float>(4, 5); +#endif + + complexVoltages.hostToDevice(CL_FALSE); + stokesKernel.enqueue(queue, counter); + stokesData.deviceToHost(CL_TRUE); + + for (unsigned stokes = 0; stokes < ps.nrCoherentStokes(); stokes ++) + std::cout << stokesData[4][stokes][174 / ps.coherentStokesTimeIntegrationFactor()][18] << std::endl; + } + } +}; + + +template <typename SampleType> struct UHEP_BeamFormerTest : public UnitTest +{ + UHEP_BeamFormerTest(const Parset &ps) + : + UnitTest(ps, "UHEP/BeamFormer.cl") + { + if (ps.nrStations() >= 5 && (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) >= 13 && ps.nrSubbands() >= 7 && ps.nrTABs() >= 6) { + MultiArraySharedBuffer<SampleType, 4> inputSamples(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE); + UHEP_BeamFormerKernel<SampleType> beamFormer(ps, program, complexVoltages, inputSamples, beamFormerWeights); + + inputSamples[4][6][12][1] = SampleType(2, 3); + beamFormerWeights[4][6][5] = SampleType(4, 5); + + inputSamples.hostToDevice(CL_FALSE); + beamFormerWeights.hostToDevice(CL_FALSE); + beamFormer.enqueue(queue, counter); + complexVoltages.deviceToHost(CL_TRUE); + + check(complexVoltages[6][12][5][1], std::complex<float>(-7, 22)); + } + } +}; + + +struct UHEP_TransposeTest : public UnitTest +{ + UHEP_TransposeTest(const Parset &ps) + : + UnitTest(ps, "UHEP/Transpose.cl") + { + if (ps.nrSubbands() >= 19 && ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1 >= 175 && ps.nrTABs() >= 5) { + MultiArraySharedBuffer<std::complex<float>, 4> transposedData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][512], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY); + cl::Buffer devReverseSubbandMapping(context, CL_MEM_READ_ONLY, 512 * sizeof(int)); + UHEP_TransposeKernel transpose(ps, program, transposedData, complexVoltages, devReverseSubbandMapping); + + complexVoltages[18][174][4][1] = std::complex<float>(24, 42); + + queue.enqueueWriteBuffer(devReverseSubbandMapping, CL_FALSE, 0, 512 * sizeof(int), reverseSubbandMapping); + complexVoltages.hostToDevice(CL_FALSE); + transpose.enqueue(queue, counter); + transposedData.deviceToHost(CL_TRUE); + + check(transposedData[4][1][174][38], std::complex<float>(24, 42)); + } + } +}; + + +struct UHEP_TriggerTest : public UnitTest +{ + UHEP_TriggerTest(const Parset &ps) + : + UnitTest(ps, "UHEP/Trigger.cl") + { + if (ps.nrTABs() >= 4 && 1024 * ps.nrSamplesPerChannel() > 100015) { + MultiArraySharedBuffer<float, 3> inputData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel() * 1024], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<TriggerInfo, 1> triggerInfo(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + UHEP_TriggerKernel trigger(ps, program, triggerInfo, inputData); + + inputData[3][1][100015] = 1000; + + inputData.hostToDevice(CL_FALSE); + trigger.enqueue(queue, counter); + triggerInfo.deviceToHost(CL_TRUE); + + std::cout << "trigger info: mean = " << triggerInfo[3].mean << ", variance = " << triggerInfo[3].variance << ", bestValue = " << triggerInfo[3].bestValue << ", bestApproxIndex = " << triggerInfo[3].bestApproxIndex << std::endl; + //check(triggerInfo[3].mean, (float) (1000.0f * 1000.0f) / (float) (ps.nrSamplesPerChannel() * 1024)); + check(triggerInfo[3].bestValue, 1000.0f * 1000.0f); + check(triggerInfo[3].bestApproxIndex, 100016U); + } + } +}; + + +#if 0 +struct FFT_Test : public UnitTest +{ + FFT_Test(const Parset &ps) + : UnitTest(ps, "fft.cl") + { + MultiArraySharedBuffer<std::complex<float>, 1> in(boost::extents[8], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY); + MultiArraySharedBuffer<std::complex<float>, 1> out(boost::extents[8], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY); + + for (unsigned i = 0; i < 8; i ++) + in[i] = std::complex<float>(2 * i + 1, 2 * i + 2); + + clAmdFftSetupData setupData; + cl::detail::errHandler(clAmdFftInitSetupData(&setupData), "clAmdFftInitSetupData"); + setupData.debugFlags = CLFFT_DUMP_PROGRAMS; + cl::detail::errHandler(clAmdFftSetup(&setupData), "clAmdFftSetup"); + + clAmdFftPlanHandle plan; + size_t dim[1] = { 8 }; + + cl::detail::errHandler(clAmdFftCreateDefaultPlan(&plan, context(), CLFFT_1D, dim), "clAmdFftCreateDefaultPlan"); + cl::detail::errHandler(clAmdFftSetResultLocation(plan, CLFFT_OUTOFPLACE), "clAmdFftSetResultLocation"); + cl::detail::errHandler(clAmdFftSetPlanBatchSize(plan, 1), "clAmdFftSetPlanBatchSize"); + cl::detail::errHandler(clAmdFftBakePlan(plan, 1, &queue(), 0, 0), "clAmdFftBakePlan"); + + in.hostToDevice(CL_FALSE); + cl_mem ins[1] = { ((cl::Buffer) in)() }; + cl_mem outs[1] = { ((cl::Buffer) out)() }; +#if 1 + cl::detail::errHandler(clAmdFftEnqueueTransform(plan, CLFFT_FORWARD, 1, &queue(), 0, 0, 0, ins, outs, 0), "clAmdFftEnqueueTransform"); +#else + cl::Kernel kernel(program, "fft_fwd"); + kernel.setArg(0, (cl::Buffer) in); + kernel.setArg(1, (cl::Buffer) out); + queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(64, 1, 1), cl::NDRange(64, 1, 1)); +#endif + out.deviceToHost(CL_TRUE); + + for (unsigned i = 0; i < 8; i ++) + std::cout << out[i] << std::endl; + + cl::detail::errHandler(clAmdFftDestroyPlan(&plan), "clAmdFftDestroyPlan"); + cl::detail::errHandler(clAmdFftTeardown(), "clAmdFftTeardown"); + } +}; +#endif + + +} // namespace RTCP +} // namespace LOFAR + +int main(int argc, char **argv) +{ + using namespace LOFAR::RTCP; + + std::cout << "running ..." << std::endl; + + if (setenv("DISPLAY", ":0.0", 1) < 0) { + perror("error setting DISPLAY"); + exit(1); + } + + if (argc != 2) { + std::cerr << "usage: " << argv[0] << " parset" << std::endl; + exit(1); + } + +#if 0 && defined __linux__ + set_affinity(0); +#endif + + try { + Parset ps(argv[1]); + +#if 0 + const char *str = getenv("NR_STATIONS"); + ps.nrStations() = str ? atoi(str) : 77; +#endif + std::cout << "nr stations = " << ps.nrStations() << std::endl; + + const char *str = getenv("NR_GPUS"); + nrGPUs = str ? atoi(str) : 1; + +#if 0 + ps.nrSubbands() = 10;//488; + ps.nrChannelsPerSubband() = 64; + ps.nrBeams() = 1; + ps.nrSamplesPerChannel() = 196608 / ps.nrChannelsPerSubband(); + ps.subbandBandwidth() = 195312.5; + ps.correctBandPass() = true; +#endif + + profiling = false; CorrelatorPipeline(ps).doWork(); + profiling = true; CorrelatorPipeline(ps).doWork(); + + //(CorrelatorTest)(ps); + +#if 0 + ps.nrSubbands() = 488; + ps.nrChannelsPerSubband() = 2048; + ps.nrBeams() = 1; + ps.nrTABs() = 128; + ps.nrIncoherentStokes() = 4; + ps.nrCoherentStokes() = 4; + ps.incoherentStokesTimeIntegrationFactor() = 8; + ps.coherentStokesTimeIntegrationFactor() = 8; + ps.nrSamplesPerChannel() = 65536 / ps.nrChannelsPerSubband();//262144 / ps.nrChannelsPerSubband(); + ps.subbandBandwidth() = 195312.5; + ps.correctBandPass() = true; + ps.dedispersionFFTsize() = ps.nrSamplesPerChannel(); + + profiling = false; BeamFormerPipeline(ps).doWork(); + profiling = true; BeamFormerPipeline(ps).doWork(); + //(IncoherentStokesTest)(ps); + //(IntToFloatTest)(ps); + //(BeamFormerTest)(ps); + //(BeamFormerTransposeTest)(ps); + //(DedispersionChirpTest)(ps); + //(CoherentStokesTest)(ps); +#endif + +#if 0 + ps.nrSubbands() = 488; + ps.nrSamplesPerChannel() = 1024; + ps.nrBeams() = 1; + ps.subbandBandwidth() = 195312.5; + ps.nrTABs() = 48; + + profiling = false; UHEP_Pipeline(ps).doWork(); + profiling = true; UHEP_Pipeline(ps).doWork(); + //(UHEP_BeamFormerTest)(ps); + //(UHEP_TransposeTest)(ps); + //(UHEP_TriggerTest)(ps); +#endif + +#if 0 + (FFT_Test)(ps); +#endif + } catch (cl::Error &error) { +#pragma omp critical (cerr) + std::cerr << "OpenCL error: " << error.what() << ": " << errorMessage(error.err()) << std::endl << error; + exit(1); + } + + return 0; +} diff --git a/RTCP/GPUProc/src/ReaderWriterSynchronization.cc b/RTCP/GPUProc/src/ReaderWriterSynchronization.cc new file mode 100644 index 0000000000000000000000000000000000000000..6c419634594b8fcc41b1a44939c3dd9f18e0fc23 --- /dev/null +++ b/RTCP/GPUProc/src/ReaderWriterSynchronization.cc @@ -0,0 +1,124 @@ +//# +//# +//# Copyright (C) 2000, 2001 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: ReaderWriterSynchronization.cc 17893 2011-04-29 09:04:10Z romein $ + +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +#include <ReaderWriterSynchronization.h> + + +namespace LOFAR { +namespace RTCP { + + +ReaderAndWriterSynchronization::~ReaderAndWriterSynchronization() +{ +} + + + + +SynchronizedReaderAndWriter::SynchronizedReaderAndWriter(unsigned bufferSize) +: + itsBufferSize(bufferSize) +{ +} + + +SynchronizedReaderAndWriter::~SynchronizedReaderAndWriter() +{ +} + + +void SynchronizedReaderAndWriter::startRead(const TimeStamp &begin, const TimeStamp &end) +{ + itsReadPointer.advanceTo(begin); + itsWritePointer.waitFor(end); +} + + +void SynchronizedReaderAndWriter::finishedRead(const TimeStamp &advanceTo) +{ + itsReadPointer.advanceTo(advanceTo); +} + + +void SynchronizedReaderAndWriter::startWrite(const TimeStamp &begin, const TimeStamp &end) +{ + itsWritePointer.advanceTo(begin); + itsReadPointer.waitFor(end - itsBufferSize); +} + + +void SynchronizedReaderAndWriter::finishedWrite(const TimeStamp &advanceTo) +{ + itsWritePointer.advanceTo(advanceTo); +} + + +void SynchronizedReaderAndWriter::noMoreReading() +{ + // advance read pointer to infinity, to unblock thread that waits in startWrite + itsReadPointer.advanceTo(TimeStamp(0x7FFFFFFFFFFFFFFFLL)); // we only use this TimeStamp for comparison so clockSpeed does not matter +} + + +void SynchronizedReaderAndWriter::noMoreWriting() +{ + itsWritePointer.advanceTo(TimeStamp(0x7FFFFFFFFFFFFFFFLL)); +} + + +TimeSynchronizedReader::TimeSynchronizedReader(unsigned maximumNetworkLatency) +: + itsMaximumNetworkLatency(maximumNetworkLatency) +{ +} + + +TimeSynchronizedReader::~TimeSynchronizedReader() +{ +} + + +void TimeSynchronizedReader::startRead(const TimeStamp & /*begin*/, const TimeStamp &end) +{ + itsWallClock.waitUntil(end + itsMaximumNetworkLatency); +} + + +void TimeSynchronizedReader::finishedRead(const TimeStamp & /*advanceTo*/) +{ +} + + +void TimeSynchronizedReader::startWrite(const TimeStamp & /*begin*/, const TimeStamp & /*end*/) +{ +} + + +void TimeSynchronizedReader::finishedWrite(const TimeStamp & /*advanceTo*/) +{ +} + +} // namespace RTCP +} // namespace LOFAR diff --git a/RTCP/GPUProc/src/ReaderWriterSynchronization.h b/RTCP/GPUProc/src/ReaderWriterSynchronization.h new file mode 100644 index 0000000000000000000000000000000000000000..6cb32a2a95acd0cd5687e76c195676acfbdd9b68 --- /dev/null +++ b/RTCP/GPUProc/src/ReaderWriterSynchronization.h @@ -0,0 +1,93 @@ +//# Copyright (C) 2007 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: ReaderWriterSynchronization.h 17893 2011-04-29 09:04:10Z romein $ + +#ifndef LOFAR_GPUPROC_READER_WRITER_SYNCHRONIZATION +#define LOFAR_GPUPROC_READER_WRITER_SYNCHRONIZATION + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + +#include <Interface/RSPTimeStamp.h> +#include <SlidingPointer.h> +#include <WallClockTime.h> + +#include <pthread.h> + + +namespace LOFAR { +namespace RTCP { + + +class ReaderAndWriterSynchronization +{ + public: + virtual ~ReaderAndWriterSynchronization(); + + virtual void startRead(const TimeStamp &begin, const TimeStamp &end) = 0; + virtual void finishedRead(const TimeStamp &advanceTo) = 0; + + virtual void startWrite(const TimeStamp &begin, const TimeStamp &end) = 0; + virtual void finishedWrite(const TimeStamp &advanceTo) = 0; +}; + + +class SynchronizedReaderAndWriter : public ReaderAndWriterSynchronization +{ + public: + SynchronizedReaderAndWriter(unsigned bufferSize); + ~SynchronizedReaderAndWriter(); + + virtual void startRead(const TimeStamp &begin, const TimeStamp &end); + virtual void finishedRead(const TimeStamp &advanceTo); + + virtual void startWrite(const TimeStamp &begin, const TimeStamp &end); + virtual void finishedWrite(const TimeStamp &advanceTo); + + void noMoreReading(); + void noMoreWriting(); + + private: + SlidingPointer<TimeStamp> itsReadPointer, itsWritePointer; + unsigned itsBufferSize; +}; + + +class TimeSynchronizedReader : public ReaderAndWriterSynchronization +{ + public: + TimeSynchronizedReader(unsigned maximumNetworkLatency); + ~TimeSynchronizedReader(); + + virtual void startRead(const TimeStamp &begin, const TimeStamp &end); + virtual void finishedRead(const TimeStamp &advanceTo); + + virtual void startWrite(const TimeStamp &begin, const TimeStamp &end); + virtual void finishedWrite(const TimeStamp &advanceTo); + + private: + WallClockTime itsWallClock; + unsigned itsMaximumNetworkLatency; +}; + + + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/Scheduling.cc b/RTCP/GPUProc/src/Scheduling.cc new file mode 100644 index 0000000000000000000000000000000000000000..6541f34e495574c7c4de1a64bd01e2d62d5fe96f --- /dev/null +++ b/RTCP/GPUProc/src/Scheduling.cc @@ -0,0 +1,85 @@ +//# Scheduling.cc: +//# +//# Copyright (C) 2008 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: Scheduling.cc 13340 2009-05-28 08:47:40Z mol $ + +//# Always #include <lofar_config.h> first! +#include <lofar_config.h> + +#if defined HAVE_BGP_ION + +#include <IONProc/Scheduling.h> +#include <Common/LofarLogger.h> + +#include <iostream> +#include <cstdio> + +#include <sched.h> + + +namespace LOFAR { +namespace RTCP { + +void doNotRunOnCore0() +{ + cpu_set_t cpu_set; + + CPU_ZERO(&cpu_set); + + for (unsigned cpu = 1; cpu < 4; cpu ++) + CPU_SET(cpu, &cpu_set); + + if (sched_setaffinity(0, sizeof cpu_set, &cpu_set) != 0) { + LOG_WARN("sched_setaffinity failed"); + perror("sched_setaffinity"); + } +} + + +void runOnCore0() +{ + cpu_set_t cpu_set; + + CPU_ZERO(&cpu_set); + CPU_SET(0, &cpu_set); + + if (sched_setaffinity(0, sizeof cpu_set, &cpu_set) != 0) { + LOG_WARN("sched_setaffinity failed"); + perror("sched_setaffinity"); + } +} + + +void setPriority(unsigned priority) +{ + // priority 0: non-real time + // priority 1-99: real time + struct sched_param sched_param; + + sched_param.sched_priority = priority; + + if (pthread_setschedparam(pthread_self(), priority ? SCHED_RR : SCHED_OTHER, &sched_param) < 0) + perror("pthread_setschedparam"); +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/Scheduling.h b/RTCP/GPUProc/src/Scheduling.h new file mode 100644 index 0000000000000000000000000000000000000000..ee42981e5ccaeea7861538252153b6fae55eea7a --- /dev/null +++ b/RTCP/GPUProc/src/Scheduling.h @@ -0,0 +1,44 @@ +//# OutputThread.h +//# +//# Copyright (C) 2006 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: Scheduling.h 13340 2009-05-28 08:47:40Z mol $ + +#ifndef LOFAR_GPUPROC_SCHEDULING_H +#define LOFAR_GPUPROC_SCHEDULING_H + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + +namespace LOFAR { +namespace RTCP { + +#if defined HAVE_BGP_ION +// Core 0 handles all ethernet and tree interrupts. Do not run time-critical +// threads on this core. +extern void doNotRunOnCore0(); +extern void runOnCore0(); + +// set thread priority. 0 = normal, 1 - 99 = real time +extern void setPriority(unsigned priority); +#endif + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/SlidingPointer.h b/RTCP/GPUProc/src/SlidingPointer.h new file mode 100644 index 0000000000000000000000000000000000000000..6c2b79632190c11de93d5be4dffec1877d027046 --- /dev/null +++ b/RTCP/GPUProc/src/SlidingPointer.h @@ -0,0 +1,94 @@ +//# Copyright (C) 2007 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: SlidingPointer.h 17975 2011-05-10 09:52:51Z mol $ + +#ifndef LOFAR_GPUPROC_SLIDING_POINTER_H +#define LOFAR_GPUPROC_SLIDING_POINTER_H + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + +#include <Common/Thread/Condition.h> +#include <Common/Thread/Mutex.h> + + +namespace LOFAR { +namespace RTCP { + + +template <typename T> class SlidingPointer +{ + public: + SlidingPointer(); + SlidingPointer(const T &); + + void advanceTo(const T &); + void waitFor(const T &); + + private: + T itsValue, itsWaitingForValue; + Mutex itsMutex; + Condition itsAwaitedValueReached; + bool itsIsWaiting; +}; + + +template <typename T> inline SlidingPointer<T>::SlidingPointer() +: + itsIsWaiting(false) +{ +} + + +template <typename T> inline SlidingPointer<T>::SlidingPointer(const T &value) +: + itsValue(value), + itsIsWaiting(false) +{ +} + + +template <typename T> inline void SlidingPointer<T>::advanceTo(const T &value) +{ + ScopedLock lock(itsMutex); + + if (value > itsValue) { + itsValue = value; + + if (itsIsWaiting && value >= itsWaitingForValue) + itsAwaitedValueReached.signal(); + } +} + + +template <typename T> inline void SlidingPointer<T>::waitFor(const T &value) +{ + ScopedLock lock(itsMutex); + + while (itsValue < value) { + itsIsWaiting = true; + itsWaitingForValue = value; + itsAwaitedValueReached.wait(itsMutex); + itsIsWaiting = false; + } +} + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/UHEP/BeamFormer.cl b/RTCP/GPUProc/src/UHEP/BeamFormer.cl new file mode 100644 index 0000000000000000000000000000000000000000..68879b4c010b1b51801b7afd4c1000eb73d7cf57 --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/BeamFormer.cl @@ -0,0 +1,782 @@ +#define MAX(A,B) ((A)>(B)?(A):(B)) +#define NR_PASSES MAX((NR_STATIONS + 6) / 16, 1) // gives best results on GTX 680 +#define NR_STATIONS_PER_PASS ((NR_STATIONS + NR_PASSES - 1) / NR_PASSES) + +#if NR_STATIONS_PER_PASS > 48 +#error "need more passes to beam form this number of stations" +#endif + +#if NR_BITS_PER_SAMPLE == 8 +typedef char4 SampleType; +#else +typedef short4 SampleType; +#endif + + +typedef __global float2 (*ComplexVoltagesType)[NR_SUBBANDS][NR_SAMPLES_PER_SUBBAND + NR_STATION_FILTER_TAPS - 1][NR_TABS][NR_POLARIZATIONS]; +typedef __global SampleType (*SamplesType)[NR_STATIONS][NR_SUBBANDS][NR_SAMPLES_PER_SUBBAND + NR_STATION_FILTER_TAPS - 1]; +typedef __global float2 (*WeightsType)[NR_STATIONS][NR_SUBBANDS][NR_TABS]; + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *samplesPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + SamplesType samples = (SamplesType) samplesPtr; + WeightsType weights = (WeightsType) weightsPtr; + + uint pol = get_local_id(0); + uint tab = get_local_id(1); + uint subband = get_global_id(2); + + float2 sample; + __local union { + float2 samples[NR_STATIONS_PER_PASS][16][NR_POLARIZATIONS]; + float4 samples4[NR_STATIONS_PER_PASS][16]; + } _local; + +#pragma unroll + for (uint first_station = 0; first_station < NR_STATIONS; first_station += NR_STATIONS_PER_PASS) { +#if NR_STATIONS_PER_PASS >= 1 + float2 weight_00; + + if (first_station + 0 < NR_STATIONS) + weight_00 = (*weights)[first_station + 0][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 2 + float2 weight_01; + + if (first_station + 1 < NR_STATIONS) + weight_01 = (*weights)[first_station + 1][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 3 + float2 weight_02; + + if (first_station + 2 < NR_STATIONS) + weight_02 = (*weights)[first_station + 2][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 4 + float2 weight_03; + + if (first_station + 3 < NR_STATIONS) + weight_03 = (*weights)[first_station + 3][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 5 + float2 weight_04; + + if (first_station + 4 < NR_STATIONS) + weight_04 = (*weights)[first_station + 4][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 6 + float2 weight_05; + + if (first_station + 5 < NR_STATIONS) + weight_05 = (*weights)[first_station + 5][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 7 + float2 weight_06; + + if (first_station + 6 < NR_STATIONS) + weight_06 = (*weights)[first_station + 6][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 8 + float2 weight_07; + + if (first_station + 7 < NR_STATIONS) + weight_07 = (*weights)[first_station + 7][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 9 + float2 weight_08; + + if (first_station + 8 < NR_STATIONS) + weight_08 = (*weights)[first_station + 8][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 10 + float2 weight_09; + + if (first_station + 9 < NR_STATIONS) + weight_09 = (*weights)[first_station + 9][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 11 + float2 weight_10; + + if (first_station + 10 < NR_STATIONS) + weight_10 = (*weights)[first_station + 10][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 12 + float2 weight_11; + + if (first_station + 11 < NR_STATIONS) + weight_11 = (*weights)[first_station + 11][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 13 + float2 weight_12; + + if (first_station + 12 < NR_STATIONS) + weight_12 = (*weights)[first_station + 12][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 14 + float2 weight_13; + + if (first_station + 13 < NR_STATIONS) + weight_13 = (*weights)[first_station + 13][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 15 + float2 weight_14; + + if (first_station + 14 < NR_STATIONS) + weight_14 = (*weights)[first_station + 14][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 16 + float2 weight_15; + + if (first_station + 15 < NR_STATIONS) + weight_15 = (*weights)[first_station + 15][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 17 + float2 weight_16; + + if (first_station + 16 < NR_STATIONS) + weight_16 = (*weights)[first_station + 16][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 18 + float2 weight_17; + + if (first_station + 17 < NR_STATIONS) + weight_17 = (*weights)[first_station + 17][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 19 + float2 weight_18; + + if (first_station + 18 < NR_STATIONS) + weight_18 = (*weights)[first_station + 18][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 20 + float2 weight_19; + + if (first_station + 19 < NR_STATIONS) + weight_19 = (*weights)[first_station + 19][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 21 + float2 weight_20; + + if (first_station + 20 < NR_STATIONS) + weight_20 = (*weights)[first_station + 20][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 22 + float2 weight_21; + + if (first_station + 21 < NR_STATIONS) + weight_21 = (*weights)[first_station + 21][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 23 + float2 weight_22; + + if (first_station + 22 < NR_STATIONS) + weight_22 = (*weights)[first_station + 22][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 24 + float2 weight_23; + + if (first_station + 23 < NR_STATIONS) + weight_23 = (*weights)[first_station + 23][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 25 + float2 weight_24; + + if (first_station + 24 < NR_STATIONS) + weight_24 = (*weights)[first_station + 24][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 26 + float2 weight_25; + + if (first_station + 25 < NR_STATIONS) + weight_25 = (*weights)[first_station + 25][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 27 + float2 weight_26; + + if (first_station + 26 < NR_STATIONS) + weight_26 = (*weights)[first_station + 26][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 28 + float2 weight_27; + + if (first_station + 27 < NR_STATIONS) + weight_27 = (*weights)[first_station + 27][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 29 + float2 weight_28; + + if (first_station + 28 < NR_STATIONS) + weight_28 = (*weights)[first_station + 28][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 30 + float2 weight_29; + + if (first_station + 29 < NR_STATIONS) + weight_29 = (*weights)[first_station + 29][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 31 + float2 weight_30; + + if (first_station + 30 < NR_STATIONS) + weight_30 = (*weights)[first_station + 30][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 32 + float2 weight_31; + + if (first_station + 31 < NR_STATIONS) + weight_31 = (*weights)[first_station + 31][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 33 + float2 weight_32; + + if (first_station + 32 < NR_STATIONS) + weight_32 = (*weights)[first_station + 32][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 34 + float2 weight_33; + + if (first_station + 33 < NR_STATIONS) + weight_33 = (*weights)[first_station + 33][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 35 + float2 weight_34; + + if (first_station + 34 < NR_STATIONS) + weight_34 = (*weights)[first_station + 34][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 36 + float2 weight_35; + + if (first_station + 35 < NR_STATIONS) + weight_35 = (*weights)[first_station + 35][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 37 + float2 weight_36; + + if (first_station + 36 < NR_STATIONS) + weight_36 = (*weights)[first_station + 36][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 38 + float2 weight_37; + + if (first_station + 37 < NR_STATIONS) + weight_37 = (*weights)[first_station + 37][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 39 + float2 weight_38; + + if (first_station + 38 < NR_STATIONS) + weight_38 = (*weights)[first_station + 38][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 40 + float2 weight_39; + + if (first_station + 39 < NR_STATIONS) + weight_39 = (*weights)[first_station + 39][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 41 + float2 weight_40; + + if (first_station + 40 < NR_STATIONS) + weight_40 = (*weights)[first_station + 40][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 42 + float2 weight_41; + + if (first_station + 41 < NR_STATIONS) + weight_41 = (*weights)[first_station + 41][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 43 + float2 weight_42; + + if (first_station + 42 < NR_STATIONS) + weight_42 = (*weights)[first_station + 42][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 44 + float2 weight_43; + + if (first_station + 43 < NR_STATIONS) + weight_43 = (*weights)[first_station + 43][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 45 + float2 weight_44; + + if (first_station + 44 < NR_STATIONS) + weight_44 = (*weights)[first_station + 44][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 46 + float2 weight_45; + + if (first_station + 45 < NR_STATIONS) + weight_45 = (*weights)[first_station + 45][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 47 + float2 weight_46; + + if (first_station + 46 < NR_STATIONS) + weight_46 = (*weights)[first_station + 46][subband][tab]; +#endif + +#if NR_STATIONS_PER_PASS >= 48 + float2 weight_47; + + if (first_station + 47 < NR_STATIONS) + weight_47 = (*weights)[first_station + 47][subband][tab]; +#endif + + for (uint time = 0; time < NR_SAMPLES_PER_SUBBAND + NR_STATION_FILTER_TAPS - 1; time += 16) { + for (uint i = get_local_id(0) + NR_POLARIZATIONS * get_local_id(1); i < NR_STATIONS_PER_PASS * 16; i += NR_TABS * NR_POLARIZATIONS) { + uint t = i % 16; + uint s = i / 16; + + if (time + t < NR_SAMPLES_PER_SUBBAND + NR_STATION_FILTER_TAPS - 1) + if (NR_STATIONS % NR_STATIONS_PER_PASS == 0 || first_station + s < NR_STATIONS) + _local.samples4[0][i] = convert_float4((*samples)[first_station + s][subband][time + t]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for (uint t = 0; t < min(16U, (NR_SAMPLES_PER_SUBBAND + NR_STATION_FILTER_TAPS - 1 - time)); t ++) { + float2 sum = first_station == 0 ? 0 : (*complexVoltages)[subband][time + t][tab][pol]; + +#if NR_STATIONS_PER_PASS >= 1 + if (first_station + 1 < NR_STATIONS) { + sample = _local.samples[ 0][t][pol]; + sum += weight_00.xx * sample; + sum += weight_00.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 2 + if (first_station + 2 < NR_STATIONS) { + sample = _local.samples[ 1][t][pol]; + sum += weight_01.xx * sample; + sum += weight_01.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 3 + if (first_station + 3 < NR_STATIONS) { + sample = _local.samples[ 2][t][pol]; + sum += weight_02.xx * sample; + sum += weight_02.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 4 + if (first_station + 4 < NR_STATIONS) { + sample = _local.samples[ 3][t][pol]; + sum += weight_03.xx * sample; + sum += weight_03.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 5 + if (first_station + 5 < NR_STATIONS) { + sample = _local.samples[ 4][t][pol]; + sum += weight_04.xx * sample; + sum += weight_04.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 6 + if (first_station + 6 < NR_STATIONS) { + sample = _local.samples[ 5][t][pol]; + sum += weight_05.xx * sample; + sum += weight_05.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 7 + if (first_station + 7 < NR_STATIONS) { + sample = _local.samples[ 6][t][pol]; + sum += weight_06.xx * sample; + sum += weight_06.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 8 + if (first_station + 8 < NR_STATIONS) { + sample = _local.samples[ 7][t][pol]; + sum += weight_07.xx * sample; + sum += weight_07.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 9 + if (first_station + 9 < NR_STATIONS) { + sample = _local.samples[ 8][t][pol]; + sum += weight_08.xx * sample; + sum += weight_08.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 10 + if (first_station + 10 < NR_STATIONS) { + sample = _local.samples[ 9][t][pol]; + sum += weight_09.xx * sample; + sum += weight_09.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 11 + if (first_station + 11 < NR_STATIONS) { + sample = _local.samples[10][t][pol]; + sum += weight_10.xx * sample; + sum += weight_10.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 12 + if (first_station + 12 < NR_STATIONS) { + sample = _local.samples[11][t][pol]; + sum += weight_11.xx * sample; + sum += weight_11.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 13 + if (first_station + 13 < NR_STATIONS) { + sample = _local.samples[12][t][pol]; + sum += weight_12.xx * sample; + sum += weight_12.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 14 + if (first_station + 14 < NR_STATIONS) { + sample = _local.samples[13][t][pol]; + sum += weight_13.xx * sample; + sum += weight_13.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 15 + if (first_station + 15 < NR_STATIONS) { + sample = _local.samples[14][t][pol]; + sum += weight_14.xx * sample; + sum += weight_14.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 16 + if (first_station + 15 < NR_STATIONS) { + sample = _local.samples[15][t][pol]; + sum += weight_15.xx * sample; + sum += weight_15.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 17 + if (first_station + 16 < NR_STATIONS) { + sample = _local.samples[16][t][pol]; + sum += weight_16.xx * sample; + sum += weight_16.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 18 + if (first_station + 17 < NR_STATIONS) { + sample = _local.samples[17][t][pol]; + sum += weight_17.xx * sample; + sum += weight_17.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 19 + if (first_station + 18 < NR_STATIONS) { + sample = _local.samples[18][t][pol]; + sum += weight_18.xx * sample; + sum += weight_18.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 20 + if (first_station + 19 < NR_STATIONS) { + sample = _local.samples[19][t][pol]; + sum += weight_19.xx * sample; + sum += weight_19.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 21 + if (first_station + 20 < NR_STATIONS) { + sample = _local.samples[20][t][pol]; + sum += weight_20.xx * sample; + sum += weight_20.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 22 + if (first_station + 21 < NR_STATIONS) { + sample = _local.samples[21][t][pol]; + sum += weight_21.xx * sample; + sum += weight_21.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 23 + if (first_station + 22 < NR_STATIONS) { + sample = _local.samples[22][t][pol]; + sum += weight_22.xx * sample; + sum += weight_22.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 24 + if (first_station + 23 < NR_STATIONS) { + sample = _local.samples[23][t][pol]; + sum += weight_23.xx * sample; + sum += weight_23.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 25 + if (first_station + 25 < NR_STATIONS) { + sample = _local.samples[24][t][pol]; + sum += weight_24.xx * sample; + sum += weight_24.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 26 + if (first_station + 25 < NR_STATIONS) { + sample = _local.samples[25][t][pol]; + sum += weight_25.xx * sample; + sum += weight_25.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 27 + if (first_station + 26 < NR_STATIONS) { + sample = _local.samples[26][t][pol]; + sum += weight_26.xx * sample; + sum += weight_26.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 28 + if (first_station + 27 < NR_STATIONS) { + sample = _local.samples[27][t][pol]; + sum += weight_27.xx * sample; + sum += weight_27.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 29 + if (first_station + 28 < NR_STATIONS) { + sample = _local.samples[28][t][pol]; + sum += weight_28.xx * sample; + sum += weight_28.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 30 + if (first_station + 29 < NR_STATIONS) { + sample = _local.samples[29][t][pol]; + sum += weight_29.xx * sample; + sum += weight_29.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 31 + if (first_station + 30 < NR_STATIONS) { + sample = _local.samples[30][t][pol]; + sum += weight_30.xx * sample; + sum += weight_30.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 32 + if (first_station + 31 < NR_STATIONS) { + sample = _local.samples[31][t][pol]; + sum += weight_31.xx * sample; + sum += weight_31.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 33 + if (first_station + 32 < NR_STATIONS) { + sample = _local.samples[32][t][pol]; + sum += weight_32.xx * sample; + sum += weight_32.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 34 + if (first_station + 33 < NR_STATIONS) { + sample = _local.samples[33][t][pol]; + sum += weight_33.xx * sample; + sum += weight_33.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 35 + if (first_station + 34 < NR_STATIONS) { + sample = _local.samples[34][t][pol]; + sum += weight_34.xx * sample; + sum += weight_34.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 36 + if (first_station + 35 < NR_STATIONS) { + sample = _local.samples[35][t][pol]; + sum += weight_35.xx * sample; + sum += weight_35.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 37 + if (first_station + 36 < NR_STATIONS) { + sample = _local.samples[36][t][pol]; + sum += weight_36.xx * sample; + sum += weight_36.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 38 + if (first_station + 37 < NR_STATIONS) { + sample = _local.samples[37][t][pol]; + sum += weight_37.xx * sample; + sum += weight_37.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 39 + if (first_station + 38 < NR_STATIONS) { + sample = _local.samples[38][t][pol]; + sum += weight_38.xx * sample; + sum += weight_38.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 40 + if (first_station + 39 < NR_STATIONS) { + sample = _local.samples[39][t][pol]; + sum += weight_39.xx * sample; + sum += weight_39.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 41 + if (first_station + 40 < NR_STATIONS) { + sample = _local.samples[40][t][pol]; + sum += weight_40.xx * sample; + sum += weight_40.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 42 + if (first_station + 41 < NR_STATIONS) { + sample = _local.samples[41][t][pol]; + sum += weight_41.xx * sample; + sum += weight_41.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 43 + if (first_station + 42 < NR_STATIONS) { + sample = _local.samples[42][t][pol]; + sum += weight_42.xx * sample; + sum += weight_42.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 44 + if (first_station + 43 < NR_STATIONS) { + sample = _local.samples[43][t][pol]; + sum += weight_43.xx * sample; + sum += weight_43.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 45 + if (first_station + 44 < NR_STATIONS) { + sample = _local.samples[44][t][pol]; + sum += weight_44.xx * sample; + sum += weight_44.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 46 + if (first_station + 45 < NR_STATIONS) { + sample = _local.samples[45][t][pol]; + sum += weight_45.xx * sample; + sum += weight_45.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 47 + if (first_station + 46 < NR_STATIONS) { + sample = _local.samples[46][t][pol]; + sum += weight_46.xx * sample; + sum += weight_46.yy * (float2) (-sample.y, sample.x); + } +#endif + +#if NR_STATIONS_PER_PASS >= 48 + if (first_station + 47 < NR_STATIONS) { + sample = _local.samples[47][t][pol]; + sum += weight_47.xx * sample; + sum += weight_47.yy * (float2) (-sample.y, sample.x); + } +#endif + + (*complexVoltages)[subband][time + t][tab][pol] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + } +} diff --git a/RTCP/GPUProc/src/UHEP/BeamFormer.cl-0.ptx b/RTCP/GPUProc/src/UHEP/BeamFormer.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..ea220810bb76a4961820e5b0583dc8ffe90325aa --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/BeamFormer.cl-0.ptx @@ -0,0 +1,207 @@ +// +// Generated by NVIDIA NVVM Compiler +// Compiler built on Tue Feb 7 07:15:59 2012 (1328595359) +// Driver 295.20 +// + +.version 3.0 +.target sm_21, texmode_independent +.address_size 32 + +.extern .shared .align 16 .b8 shr_2__local[2048]; + +.entry complexVoltages( + .param .u32 .ptr .global .align 1 complexVoltages_param_0, + .param .u32 .ptr .global .align 1 complexVoltages_param_1, + .param .u32 .ptr .global .align 1 complexVoltages_param_2 +) +{ + .reg .f32 %f<173>; + .reg .pred %p<7>; + .reg .s32 %r<76>; + .reg .s16 %rc<16>; + + + ld.param.u32 %r29, [complexVoltages_param_2]; + // inline asm + mov.u32 %r22, %tid.x; + // inline asm + // inline asm + mov.u32 %r23, %tid.y; + // inline asm + // inline asm + mov.u32 %r24, %envreg5; + // inline asm + // inline asm + mov.u32 %r25, %ntid.z; + // inline asm + // inline asm + mov.u32 %r26, %ctaid.z; + // inline asm + // inline asm + mov.u32 %r27, %tid.z; + // inline asm + add.s32 %r30, %r27, %r24; + mad.lo.s32 %r4, %r26, %r25, %r30; + shl.b32 %r31, %r23, 4; + mad.lo.s32 %r32, %r4, 66496, %r31; + shl.b32 %r33, %r22, 3; + add.s32 %r5, %r32, %r33; + shl.b32 %r34, %r4, 5; + shl.b32 %r35, %r23, 3; + add.s32 %r36, %r34, %r35; + add.s32 %r37, %r29, %r36; + ld.global.v2.f32 {%f131, %f132}, [%r37]; + ld.global.v2.f32 {%f137, %f138}, [%r37+15616]; + ld.global.v2.f32 {%f143, %f144}, [%r37+31232]; + ld.global.v2.f32 {%f149, %f150}, [%r37+46848]; + ld.global.v2.f32 {%f155, %f156}, [%r37+62464]; + ld.global.v2.f32 {%f161, %f162}, [%r37+78080]; + ld.global.v2.f32 {%f167, %f168}, [%r37+93696]; + mov.u32 %r72, 0; + +BB0_1: + shl.b32 %r40, %r72, 10; + add.s32 %r7, %r5, %r40; + // inline asm + mov.u32 %r38, %tid.x; + // inline asm + // inline asm + mov.u32 %r39, %tid.y; + // inline asm + shl.b32 %r9, %r39, 1; + add.s32 %r74, %r9, %r38; + setp.gt.u32 %p1, %r74, 127; + @%p1 bra BB0_6; + + add.s32 %r11, %r38, %r9; + cvt.u8.u32 %rc1, %r11; + mov.u16 %rc15, 0; + mov.u32 %r73, 0; + +BB0_3: + add.s32 %r14, %r11, %r73; + shl.b32 %r42, %r72, 4; + shl.b16 %rc5, %rc15, 3; + add.s16 %rc6, %rc1, %rc5; + cvt.u32.u8 %r43, %rc6; + and.b32 %r44, %r43, 15; + add.s32 %r15, %r42, %r44; + setp.gt.u32 %p2, %r15, 1038; + @%p2 bra BB0_5; + + shr.u32 %r45, %r14, 4; + ld.param.u32 %r71, [complexVoltages_param_1]; + mad.lo.s32 %r46, %r45, 2028128, %r71; + mad.lo.s32 %r47, %r4, 4156, %r46; + shl.b32 %r48, %r15, 2; + add.s32 %r49, %r47, %r48; + ld.global.v4.u8 {%rc11, %rc12, %rc13, %rc14}, [%r49]; + // inline asm + cvt.rn.f32.s8 %f1, %rc11; + // inline asm + // inline asm + cvt.rn.f32.s8 %f2, %rc12; + // inline asm + // inline asm + cvt.rn.f32.s8 %f3, %rc13; + // inline asm + // inline asm + cvt.rn.f32.s8 %f4, %rc14; + // inline asm + shl.b32 %r50, %r74, 4; + mov.u32 %r51, shr_2__local; + add.s32 %r52, %r51, %r50; + st.shared.v4.f32 [%r52], {%f1, %f2, %f3, %f4}; + +BB0_5: + add.s32 %r74, %r74, 8; + setp.lt.u32 %p3, %r74, 128; + add.s32 %r73, %r73, 8; + add.s16 %rc15, %rc15, 1; + @%p3 bra BB0_3; + +BB0_6: + bar.sync 0; + mov.u32 %r56, 1039; + shl.b32 %r57, %r72, 4; + sub.s32 %r18, %r56, %r57; + mov.u32 %r54, 16; + // inline asm + min.u32 %r53, %r54, %r18; + // inline asm + setp.eq.s32 %p4, %r53, 0; + @%p4 bra BB0_9; + + mov.u32 %r75, 0; + +BB0_8: + shl.b32 %r62, %r75, 6; + add.s32 %r63, %r7, %r62; + ld.param.u32 %r70, [complexVoltages_param_0]; + add.s32 %r64, %r70, %r63; + shl.b32 %r65, %r75, 4; + mov.u32 %r66, shr_2__local; + add.s32 %r67, %r66, %r65; + add.s32 %r69, %r67, %r33; + ld.shared.v2.f32 {%f27, %f28}, [%r69]; + mov.f32 %f5, 0f00000000; + fma.rn.ftz.f32 %f31, %f131, %f27, %f5; + fma.rn.ftz.f32 %f32, %f131, %f28, %f5; + neg.ftz.f32 %f7, %f28; + fma.rn.ftz.f32 %f39, %f132, %f7, %f31; + fma.rn.ftz.f32 %f40, %f132, %f27, %f32; + ld.shared.v2.f32 {%f43, %f44}, [%r69+256]; + fma.rn.ftz.f32 %f45, %f137, %f43, %f39; + fma.rn.ftz.f32 %f46, %f137, %f44, %f40; + neg.ftz.f32 %f10, %f44; + fma.rn.ftz.f32 %f53, %f138, %f10, %f45; + fma.rn.ftz.f32 %f54, %f138, %f43, %f46; + ld.shared.v2.f32 {%f57, %f58}, [%r69+512]; + fma.rn.ftz.f32 %f59, %f143, %f57, %f53; + fma.rn.ftz.f32 %f60, %f143, %f58, %f54; + neg.ftz.f32 %f13, %f58; + fma.rn.ftz.f32 %f67, %f144, %f13, %f59; + fma.rn.ftz.f32 %f68, %f144, %f57, %f60; + ld.shared.v2.f32 {%f71, %f72}, [%r69+768]; + fma.rn.ftz.f32 %f73, %f149, %f71, %f67; + fma.rn.ftz.f32 %f74, %f149, %f72, %f68; + neg.ftz.f32 %f16, %f72; + fma.rn.ftz.f32 %f81, %f150, %f16, %f73; + fma.rn.ftz.f32 %f82, %f150, %f71, %f74; + ld.shared.v2.f32 {%f85, %f86}, [%r69+1024]; + fma.rn.ftz.f32 %f87, %f155, %f85, %f81; + fma.rn.ftz.f32 %f88, %f155, %f86, %f82; + neg.ftz.f32 %f19, %f86; + fma.rn.ftz.f32 %f95, %f156, %f19, %f87; + fma.rn.ftz.f32 %f96, %f156, %f85, %f88; + ld.shared.v2.f32 {%f99, %f100}, [%r69+1280]; + fma.rn.ftz.f32 %f101, %f161, %f99, %f95; + fma.rn.ftz.f32 %f102, %f161, %f100, %f96; + neg.ftz.f32 %f22, %f100; + fma.rn.ftz.f32 %f109, %f162, %f22, %f101; + fma.rn.ftz.f32 %f110, %f162, %f99, %f102; + ld.shared.v2.f32 {%f113, %f114}, [%r69+1536]; + fma.rn.ftz.f32 %f115, %f167, %f113, %f109; + fma.rn.ftz.f32 %f116, %f167, %f114, %f110; + neg.ftz.f32 %f25, %f114; + fma.rn.ftz.f32 %f123, %f168, %f25, %f115; + fma.rn.ftz.f32 %f124, %f168, %f113, %f116; + st.global.v2.f32 [%r64], {%f123, %f124}; + // inline asm + min.u32 %r59, %r54, %r18; + // inline asm + add.s32 %r75, %r75, 1; + setp.lt.u32 %p5, %r75, %r59; + @%p5 bra BB0_8; + +BB0_9: + bar.sync 0; + add.s32 %r72, %r72, 1; + setp.ne.s32 %p6, %r72, 65; + @%p6 bra BB0_1; + + ret; +} + + diff --git a/RTCP/GPUProc/src/UHEP/BeamFormer.cl.4groups b/RTCP/GPUProc/src/UHEP/BeamFormer.cl.4groups new file mode 100644 index 0000000000000000000000000000000000000000..d6f2e81bf035e401d11f645d14df6a99e3eb7321 --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/BeamFormer.cl.4groups @@ -0,0 +1,225 @@ +#define NR_THREADS_PER_GROUP 4 +#define NR_STATIONS_PER_GROUP ((NR_STATIONS + NR_THREADS_PER_GROUP - 1) / NR_THREADS_PER_GROUP) + +#if NR_BITS_PER_SAMPLE == 8 +typedef char2 SampleType; +#else +typedef short2 SampleType; +#endif + +#if NR_STATIONS > 48 +#error unsupported number of stations (max = 48) +#endif + + +typedef __global float2 (*ComplexVoltagesType)[NR_SUBBANDS][NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1][NR_TABS][NR_POLARIZATIONS]; +typedef __global SampleType (*SamplesType)[NR_STATIONS][NR_SUBBANDS][NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS]; +typedef __global float2 (*WeightsType)[NR_STATIONS][NR_SUBBANDS][NR_TABS]; + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *samplesPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + SamplesType samples = (SamplesType) samplesPtr; + WeightsType weights = (WeightsType) weightsPtr; + + uint pol = get_local_id(0) & 1; + uint group = get_local_id(0) >> 1; + uint tab = get_local_id(1); + uint subband = get_global_id(2); + uint first_station = group * NR_STATIONS_PER_GROUP; + + float2 sample; + __local float2 local_samples[16][NR_STATIONS + 1][NR_POLARIZATIONS]; + __local float2 local_sum[NR_TABS][NR_THREADS_PER_GROUP / 2][NR_POLARIZATIONS]; + +#if NR_STATIONS_PER_GROUP >= 1 + float2 weight_00 = (*weights)[first_station + 0][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 2 + float2 weight_01 = (*weights)[first_station + 1][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 3 + float2 weight_02 = (*weights)[first_station + 2][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 4 + float2 weight_03 = (*weights)[first_station + 3][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 5 + float2 weight_04 = (*weights)[first_station + 4][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 6 + float2 weight_05 = (*weights)[first_station + 5][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 7 + float2 weight_06 = (*weights)[first_station + 6][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 8 + float2 weight_07 = (*weights)[first_station + 7][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 9 + float2 weight_08 = (*weights)[first_station + 8][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 10 + float2 weight_09 = (*weights)[first_station + 9][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 11 + float2 weight_10 = (*weights)[first_station + 10][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 12 + float2 weight_11 = (*weights)[first_station + 11][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 13 + float2 weight_12 = (*weights)[first_station + 12][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 14 + float2 weight_13 = (*weights)[first_station + 13][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 15 + float2 weight_14 = (*weights)[first_station + 14][subband][tab]; +#endif +#if NR_STATIONS_PER_GROUP >= 16 + float2 weight_15 = (*weights)[first_station + 15][subband][tab]; +#endif + + for (uint time = 0; time < NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1; time += 16) { + for (uint i = get_local_id(0) + NR_THREADS_PER_GROUP * NR_POLARIZATIONS * get_local_id(1); i < NR_STATIONS * 16 * NR_POLARIZATIONS; i += NR_TABS * NR_THREADS_PER_GROUP * NR_POLARIZATIONS) { + uint p = i % NR_POLARIZATIONS; + uint t = (i / NR_POLARIZATIONS) % 16; + uint s = i / NR_POLARIZATIONS / 16; + + if (time + t < NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1) + local_samples[t][s][p] = convert_float2((*samples)[s][subband][time + t][p]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for (uint t = 0; t < min(16U, (NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1 - time)); t ++) { + float2 sum; + +#if NR_STATIONS_PER_GROUP >= 1 + sample = local_samples[t][first_station + 0][pol]; + sum = weight_00.xx * sample; + sum += weight_00.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 2 + sample = local_samples[t][first_station + 1][pol]; + sum += weight_01.xx * sample; + sum += weight_01.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 3 + sample = local_samples[t][first_station + 2][pol]; + sum += weight_02.xx * sample; + sum += weight_02.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 4 + sample = local_samples[t][first_station + 3][pol]; + sum += weight_03.xx * sample; + sum += weight_03.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 5 + sample = local_samples[t][first_station + 4][pol]; + sum += weight_04.xx * sample; + sum += weight_04.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 6 + sample = local_samples[t][first_station + 5][pol]; + sum += weight_05.xx * sample; + sum += weight_05.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 7 + sample = local_samples[t][first_station + 6][pol]; + sum += weight_06.xx * sample; + sum += weight_06.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 8 + sample = local_samples[t][first_station + 7][pol]; + sum += weight_07.xx * sample; + sum += weight_07.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 9 + sample = local_samples[t][first_station + 8][pol]; + sum += weight_08.xx * sample; + sum += weight_08.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 10 + sample = local_samples[t][first_station + 9][pol]; + sum += weight_09.xx * sample; + sum += weight_09.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 11 + sample = local_samples[t][first_station + 10][pol]; + sum += weight_10.xx * sample; + sum += weight_10.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 12 + sample = local_samples[t][first_station + 11][pol]; + sum += weight_11.xx * sample; + sum += weight_11.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 13 + sample = local_samples[t][first_station + 12][pol]; + sum += weight_12.xx * sample; + sum += weight_12.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 14 + sample = local_samples[t][first_station + 13][pol]; + sum += weight_13.xx * sample; + sum += weight_13.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 15 + sample = local_samples[t][first_station + 14][pol]; + sum += weight_14.xx * sample; + sum += weight_14.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS_PER_GROUP >= 16 + sample = local_samples[t][first_station + 15][pol]; + sum += weight_15.xx * sample; + sum += weight_15.yy * (float2) (-sample.y, sample.x); +#endif + +#if 1 + if ((group & 1) != 0) + local_sum[tab][group >> 1][pol] = sum; + + if ((group & 1) == 0) + sum += local_sum[tab][group >> 1][pol]; + + if (group == 2) + local_sum[tab][0][pol] = sum; + + if (group == 0) + (*complexVoltages)[subband][time + t][tab][pol] = sum + local_sum[tab][0][pol]; +#else + float2 other_sum; + asm("shfl.down.b32 %0, %1, 1, 0x1F;" : "=r" (other_sum.x) : "r" (sum.x)); + asm("shfl.down.b32 %0, %1, 1, 0x1F;" : "=r" (other_sum.y) : "r" (sum.y)); + sum += other_sum; + asm("shfl.down.b32 %0, %1, 2, 0x1F;" : "=r" (other_sum.x) : "r" (sum.x)); + asm("shfl.down.b32 %0, %1, 2, 0x1F;" : "=r" (other_sum.y) : "r" (sum.y)); + + if (first_station == 0) + (*complexVoltages)[subband][time + t][tab][pol] = sum + other_sum; +#endif + } + + barrier(CLK_LOCAL_MEM_FENCE); + } +} diff --git a/RTCP/GPUProc/src/UHEP/BeamFormer.cl.hop b/RTCP/GPUProc/src/UHEP/BeamFormer.cl.hop new file mode 100644 index 0000000000000000000000000000000000000000..e68c6a9a8d90bbe05ba629baafee30e102b8fcbb --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/BeamFormer.cl.hop @@ -0,0 +1,177 @@ +#undef USE_WARP_SHUFFLE_INSN // FIXME: Broken + +#if NR_BITS_PER_SAMPLE == 8 +typedef char2 SampleType; +#else +typedef short2 SampleType; +#endif + + +typedef __global float2 (*ComplexVoltagesType)[NR_SUBBANDS][NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1][NR_TABS][NR_POLARIZATIONS]; +typedef __global SampleType (*SamplesType)[NR_STATIONS][NR_SUBBANDS][NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS]; +typedef __global float2 (*WeightsType)[NR_STATIONS][NR_SUBBANDS][NR_TABS]; + +float2 cmul(float2 a, float2 b) +{ + return (float2) { a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x }; +} + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *samplesPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + SamplesType samples = (SamplesType) samplesPtr; + WeightsType weights = (WeightsType) weightsPtr; + +#if defined USE_WARP_SHUFFLE_INSN + uint first_tab = 3 * get_local_id(1); + uint first_station = 6 * get_local_id(0); +#else + uint pol = get_local_id(0) & 1; + uint tab_group = (get_local_id(0) >> 1) % (NR_TABS / 3); + uint station_group = (get_local_id(0) >> 1) / (NR_TABS / 3); + uint first_tab = 3 * tab_group; + uint first_station = 6 * station_group; +#endif + uint subband = get_global_id(1); + + bool lastGroupOfStations = first_station + 6 == NR_STATIONS; + + float2 weight_0_0 = (*weights)[first_station + 0][subband][first_tab + 0]; + float2 weight_0_1 = (*weights)[first_station + 0][subband][first_tab + 1]; + float2 weight_0_2 = (*weights)[first_station + 0][subband][first_tab + 2]; + float2 weight_1_0 = (*weights)[first_station + 1][subband][first_tab + 0]; + float2 weight_1_1 = (*weights)[first_station + 1][subband][first_tab + 1]; + float2 weight_1_2 = (*weights)[first_station + 1][subband][first_tab + 2]; + float2 weight_2_0 = (*weights)[first_station + 2][subband][first_tab + 0]; + float2 weight_2_1 = (*weights)[first_station + 2][subband][first_tab + 1]; + float2 weight_2_2 = (*weights)[first_station + 2][subband][first_tab + 2]; + float2 weight_3_0 = (*weights)[first_station + 3][subband][first_tab + 0]; + float2 weight_3_1 = (*weights)[first_station + 3][subband][first_tab + 1]; + float2 weight_3_2 = (*weights)[first_station + 3][subband][first_tab + 2]; + float2 weight_4_0 = (*weights)[first_station + 4][subband][first_tab + 0]; + float2 weight_4_1 = (*weights)[first_station + 4][subband][first_tab + 1]; + float2 weight_4_2 = (*weights)[first_station + 4][subband][first_tab + 2]; + float2 weight_5_0 = (*weights)[first_station + 5][subband][first_tab + 0]; + float2 weight_5_1 = (*weights)[first_station + 5][subband][first_tab + 1]; + float2 weight_5_2 = (*weights)[first_station + 5][subband][first_tab + 2]; + +#if !defined USE_WARP_SHUFFLE_INSN + __local float2 local_sums[3][NR_STATIONS / 6][NR_TABS / 3][NR_POLARIZATIONS]; +#endif + + float2 sample_0, sample_1, sample_2, sample_3, sample_4, sample_5; + float2 sum_0, sum_1, sum_2; + +#if !defined USE_WARP_SHUFFLE_INSN + if (first_station == 0) { + local_sums[0][0][tab_group][pol] = 0; + local_sums[1][0][tab_group][pol] = 0; + local_sums[2][0][tab_group][pol] = 0; + } +#endif + + for (int time = 0 - station_group; time < (int) (NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1 + NR_STATIONS / 6 - 1 - station_group); time ++) { + bool validTime = time >= 0 && time < NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1; + + if (validTime) { + sample_0 = convert_float2((*samples)[first_station + 0][subband][time][pol]); + sample_1 = convert_float2((*samples)[first_station + 1][subband][time][pol]); + sample_2 = convert_float2((*samples)[first_station + 2][subband][time][pol]); + sample_3 = convert_float2((*samples)[first_station + 3][subband][time][pol]); + sample_4 = convert_float2((*samples)[first_station + 4][subband][time][pol]); + sample_5 = convert_float2((*samples)[first_station + 5][subband][time][pol]); + +#if !defined USE_WARP_SHUFFLE_INSN + sum_0 = local_sums[0][station_group][tab_group][pol]; + sum_1 = local_sums[1][station_group][tab_group][pol]; + sum_2 = local_sums[2][station_group][tab_group][pol]; +#else + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_0.x) : "0" (sum_0.x)); + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_0.y) : "0" (sum_0.y)); + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_0.z) : "0" (sum_0.z)); + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_0.w) : "0" (sum_0.w)); + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_1.x) : "0" (sum_1.x)); + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_1.y) : "0" (sum_1.y)); + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_1.z) : "0" (sum_1.z)); + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_1.w) : "0" (sum_1.w)); + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_2.x) : "0" (sum_2.x)); + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_2.y) : "0" (sum_2.y)); + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_2.z) : "0" (sum_2.z)); + asm("shfl.up.b32 %0, %0, 1, 0x00;" : "=r" (sum_2.w) : "0" (sum_2.w)); +#endif + + sum_0 += weight_0_0.xx * sample_0; + sum_1 += weight_0_1.xx * sample_0; + sum_2 += weight_0_2.xx * sample_0; + float2 sample_0_tr = (float2) (-sample_0.y, sample_0.x); + sum_0 += weight_0_0.yy * sample_0_tr; + sum_1 += weight_0_1.yy * sample_0_tr; + sum_2 += weight_0_2.yy * sample_0_tr; + + sum_0 += weight_1_0.xx * sample_1; + sum_1 += weight_1_1.xx * sample_1; + sum_2 += weight_1_2.xx * sample_1; + float2 sample_1_tr = (float2) (-sample_1.y, sample_1.x); + sum_0 += weight_1_0.yy * sample_1_tr; + sum_1 += weight_1_1.yy * sample_1_tr; + sum_2 += weight_1_2.yy * sample_1_tr; + + sum_0 += weight_2_0.xx * sample_2; + sum_1 += weight_2_1.xx * sample_2; + sum_2 += weight_2_2.xx * sample_2; + float2 sample_2_tr = (float2) (-sample_2.y, sample_2.x); + sum_0 += weight_2_0.yy * sample_2_tr; + sum_1 += weight_2_1.yy * sample_2_tr; + sum_2 += weight_2_2.yy * sample_2_tr; + + sum_0 += weight_3_0.xx * sample_3; + sum_1 += weight_3_1.xx * sample_3; + sum_2 += weight_3_2.xx * sample_3; + float2 sample_3_tr = (float2) (-sample_3.y, sample_3.x); + sum_0 += weight_3_0.yy * sample_3_tr; + sum_1 += weight_3_1.yy * sample_3_tr; + sum_2 += weight_3_2.yy * sample_3_tr; + + sum_0 += weight_4_0.xx * sample_4; + sum_1 += weight_4_1.xx * sample_4; + sum_2 += weight_4_2.xx * sample_4; + float2 sample_4_tr = (float2) (-sample_4.y, sample_4.x); + sum_0 += weight_4_0.yy * sample_4_tr; + sum_1 += weight_4_1.yy * sample_4_tr; + sum_2 += weight_4_2.yy * sample_4_tr; + + sum_0 += weight_5_0.xx * sample_5; + sum_1 += weight_5_1.xx * sample_5; + sum_2 += weight_5_2.xx * sample_5; + float2 sample_5_tr = (float2) (-sample_5.y, sample_5.x); + sum_0 += weight_5_0.yy * sample_5_tr; + sum_1 += weight_5_1.yy * sample_5_tr; + sum_2 += weight_5_2.yy * sample_5_tr; +#if !defined USE_WARP_SHUFFLE_INSN + } + + barrier(CLK_LOCAL_MEM_FENCE); + + if (validTime) { +#endif + if (lastGroupOfStations) { + (*complexVoltages)[subband][time][first_tab + 0][pol] = sum_0; + (*complexVoltages)[subband][time][first_tab + 1][pol] = sum_1; + (*complexVoltages)[subband][time][first_tab + 2][pol] = sum_2; +#if !defined USE_WARP_SHUFFLE_INSN + } else { + local_sums[0][station_group + 1][tab_group][pol] = sum_0; + local_sums[1][station_group + 1][tab_group][pol] = sum_1; + local_sums[2][station_group + 1][tab_group][pol] = sum_2; +#endif + } + } + +#if !defined USE_WARP_SHUFFLE_INSN + barrier(CLK_LOCAL_MEM_FENCE); +#endif + } +} diff --git a/RTCP/GPUProc/src/UHEP/BeamFormer.cl.hop-0.ptx b/RTCP/GPUProc/src/UHEP/BeamFormer.cl.hop-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..79498463a835875a85828f52fd7f756eb2b37aab Binary files /dev/null and b/RTCP/GPUProc/src/UHEP/BeamFormer.cl.hop-0.ptx differ diff --git a/RTCP/GPUProc/src/UHEP/BeamFormer.cl.not b/RTCP/GPUProc/src/UHEP/BeamFormer.cl.not new file mode 100644 index 0000000000000000000000000000000000000000..a0934a2e0bcc673c2c5cbae69ab54a45cc56a27d --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/BeamFormer.cl.not @@ -0,0 +1,451 @@ +#if NR_BITS_PER_SAMPLE == 8 +typedef char2 SampleType; +#else +typedef short2 SampleType; +#endif + +#if NR_STATIONS > 48 +#error unsupported number of stations (max = 48) +#endif + + +typedef __global float2 (*ComplexVoltagesType)[NR_SUBBANDS][NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1][NR_TABS][NR_POLARIZATIONS]; +typedef __global SampleType (*SamplesType)[NR_STATIONS][NR_SUBBANDS][NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS]; +typedef __global float (*WeightsType)[NR_STATIONS][NR_SUBBANDS][NR_TABS][2]; + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *samplesPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + SamplesType samples = (SamplesType) samplesPtr; + WeightsType weights = (WeightsType) weightsPtr; + + uint ri = get_local_id(0) & 1; + uint pol = get_local_id(0) >> 1; + uint tab = get_local_id(1); + uint subband = get_global_id(2); + + float2 sample; + __local float2 local_samples[NR_STATIONS][8][NR_POLARIZATIONS]; +#if 0 + __local float2 local_tmp[NR_TABS][NR_POLARIZATIONS]; +#endif + +#if NR_STATIONS >= 1 + float weight_00 = (*weights)[ 0][subband][tab][ri]; +#endif +#if NR_STATIONS >= 2 + float weight_01 = (*weights)[ 1][subband][tab][ri]; +#endif +#if NR_STATIONS >= 3 + float weight_02 = (*weights)[ 2][subband][tab][ri]; +#endif +#if NR_STATIONS >= 4 + float weight_03 = (*weights)[ 3][subband][tab][ri]; +#endif +#if NR_STATIONS >= 5 + float weight_04 = (*weights)[ 4][subband][tab][ri]; +#endif +#if NR_STATIONS >= 6 + float weight_05 = (*weights)[ 5][subband][tab][ri]; +#endif +#if NR_STATIONS >= 7 + float weight_06 = (*weights)[ 6][subband][tab][ri]; +#endif +#if NR_STATIONS >= 8 + float weight_07 = (*weights)[ 7][subband][tab][ri]; +#endif +#if NR_STATIONS >= 9 + float weight_08 = (*weights)[ 8][subband][tab][ri]; +#endif +#if NR_STATIONS >= 10 + float weight_09 = (*weights)[ 9][subband][tab][ri]; +#endif +#if NR_STATIONS >= 11 + float weight_10 = (*weights)[10][subband][tab][ri]; +#endif +#if NR_STATIONS >= 12 + float weight_11 = (*weights)[11][subband][tab][ri]; +#endif +#if NR_STATIONS >= 13 + float weight_12 = (*weights)[12][subband][tab][ri]; +#endif +#if NR_STATIONS >= 14 + float weight_13 = (*weights)[13][subband][tab][ri]; +#endif +#if NR_STATIONS >= 15 + float weight_14 = (*weights)[14][subband][tab][ri]; +#endif +#if NR_STATIONS >= 16 + float weight_15 = (*weights)[15][subband][tab][ri]; +#endif +#if NR_STATIONS >= 17 + float weight_16 = (*weights)[16][subband][tab][ri]; +#endif +#if NR_STATIONS >= 18 + float weight_17 = (*weights)[17][subband][tab][ri]; +#endif +#if NR_STATIONS >= 19 + float weight_18 = (*weights)[18][subband][tab][ri]; +#endif +#if NR_STATIONS >= 20 + float weight_19 = (*weights)[19][subband][tab][ri]; +#endif +#if NR_STATIONS >= 21 + float weight_20 = (*weights)[20][subband][tab][ri]; +#endif +#if NR_STATIONS >= 22 + float weight_21 = (*weights)[21][subband][tab][ri]; +#endif +#if NR_STATIONS >= 23 + float weight_22 = (*weights)[22][subband][tab][ri]; +#endif +#if NR_STATIONS >= 24 + float weight_23 = (*weights)[23][subband][tab][ri]; +#endif +#if NR_STATIONS >= 25 + float weight_24 = (*weights)[24][subband][tab][ri]; +#endif +#if NR_STATIONS >= 26 + float weight_25 = (*weights)[25][subband][tab][ri]; +#endif +#if NR_STATIONS >= 27 + float weight_26 = (*weights)[26][subband][tab][ri]; +#endif +#if NR_STATIONS >= 28 + float weight_27 = (*weights)[27][subband][tab][ri]; +#endif +#if NR_STATIONS >= 29 + float weight_28 = (*weights)[28][subband][tab][ri]; +#endif +#if NR_STATIONS >= 30 + float weight_29 = (*weights)[29][subband][tab][ri]; +#endif +#if NR_STATIONS >= 31 + float weight_30 = (*weights)[30][subband][tab][ri]; +#endif +#if NR_STATIONS >= 32 + float weight_31 = (*weights)[31][subband][tab][ri]; +#endif +#if NR_STATIONS >= 33 + float weight_32 = (*weights)[32][subband][tab][ri]; +#endif +#if NR_STATIONS >= 34 + float weight_33 = (*weights)[33][subband][tab][ri]; +#endif +#if NR_STATIONS >= 35 + float weight_34 = (*weights)[34][subband][tab][ri]; +#endif +#if NR_STATIONS >= 36 + float weight_35 = (*weights)[35][subband][tab][ri]; +#endif +#if NR_STATIONS >= 37 + float weight_36 = (*weights)[36][subband][tab][ri]; +#endif +#if NR_STATIONS >= 38 + float weight_37 = (*weights)[37][subband][tab][ri]; +#endif +#if NR_STATIONS >= 39 + float weight_38 = (*weights)[38][subband][tab][ri]; +#endif +#if NR_STATIONS >= 40 + float weight_39 = (*weights)[39][subband][tab][ri]; +#endif +#if NR_STATIONS >= 41 + float weight_40 = (*weights)[40][subband][tab][ri]; +#endif +#if NR_STATIONS >= 42 + float weight_41 = (*weights)[41][subband][tab][ri]; +#endif +#if NR_STATIONS >= 43 + float weight_42 = (*weights)[42][subband][tab][ri]; +#endif +#if NR_STATIONS >= 44 + float weight_43 = (*weights)[43][subband][tab][ri]; +#endif +#if NR_STATIONS >= 45 + float weight_44 = (*weights)[44][subband][tab][ri]; +#endif +#if NR_STATIONS >= 46 + float weight_45 = (*weights)[45][subband][tab][ri]; +#endif +#if NR_STATIONS >= 47 + float weight_46 = (*weights)[46][subband][tab][ri]; +#endif +#if NR_STATIONS >= 48 + float weight_47 = (*weights)[47][subband][tab][ri]; +#endif + + for (uint time = 0; time < NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1; time += 8) { + for (uint i = get_local_id(0) + NR_POLARIZATIONS * 2 * get_local_id(1); i < NR_STATIONS * 8 * NR_POLARIZATIONS; i += NR_TABS * NR_POLARIZATIONS * 2) { + uint p = i % NR_POLARIZATIONS; + uint t = (i / NR_POLARIZATIONS) % 8; + uint s = i / NR_POLARIZATIONS / 8; + + if (time + t < NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1) + local_samples[0][0][i] = convert_float2((*samples)[s][subband][time + t][p]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for (uint t = 0; t < min(8U, (NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1 - time)); t ++) { + float2 sum = 0; + +#if NR_STATIONS >= 1 + sample = local_samples[ 0][t][pol]; + sum += weight_00 * sample; +#endif + +#if NR_STATIONS >= 2 + sample = local_samples[ 1][t][pol]; + sum += weight_01 * sample; +#endif + +#if NR_STATIONS >= 3 + sample = local_samples[ 2][t][pol]; + sum += weight_02 * sample; +#endif + +#if NR_STATIONS >= 4 + sample = local_samples[ 3][t][pol]; + sum += weight_03 * sample; +#endif + +#if NR_STATIONS >= 5 + sample = local_samples[ 4][t][pol]; + sum += weight_04 * sample; +#endif + +#if NR_STATIONS >= 6 + sample = local_samples[ 5][t][pol]; + sum += weight_05 * sample; +#endif + +#if NR_STATIONS >= 7 + sample = local_samples[ 6][t][pol]; + sum += weight_06 * sample; +#endif + +#if NR_STATIONS >= 8 + sample = local_samples[ 7][t][pol]; + sum += weight_07 * sample; +#endif + +#if NR_STATIONS >= 9 + sample = local_samples[ 8][t][pol]; + sum += weight_08 * sample; +#endif + +#if NR_STATIONS >= 10 + sample = local_samples[ 9][t][pol]; + sum += weight_09 * sample; +#endif + +#if NR_STATIONS >= 11 + sample = local_samples[10][t][pol]; + sum += weight_10 * sample; +#endif + +#if NR_STATIONS >= 12 + sample = local_samples[11][t][pol]; + sum += weight_11 * sample; +#endif + +#if NR_STATIONS >= 13 + sample = local_samples[12][t][pol]; + sum += weight_12 * sample; +#endif + +#if NR_STATIONS >= 14 + sample = local_samples[13][t][pol]; + sum += weight_13 * sample; +#endif + +#if NR_STATIONS >= 15 + sample = local_samples[14][t][pol]; + sum += weight_14 * sample; +#endif + +#if NR_STATIONS >= 16 + sample = local_samples[15][t][pol]; + sum += weight_15 * sample; +#endif + +#if NR_STATIONS >= 17 + sample = local_samples[16][t][pol]; + sum += weight_16 * sample; +#endif + +#if NR_STATIONS >= 18 + sample = local_samples[17][t][pol]; + sum += weight_17 * sample; +#endif + +#if NR_STATIONS >= 19 + sample = local_samples[18][t][pol]; + sum += weight_18 * sample; +#endif + +#if NR_STATIONS >= 20 + sample = local_samples[19][t][pol]; + sum += weight_19 * sample; +#endif + +#if NR_STATIONS >= 21 + sample = local_samples[20][t][pol]; + sum += weight_20 * sample; +#endif + +#if NR_STATIONS >= 22 + sample = local_samples[21][t][pol]; + sum += weight_21 * sample; +#endif + +#if NR_STATIONS >= 23 + sample = local_samples[22][t][pol]; + sum += weight_22 * sample; +#endif + +#if NR_STATIONS >= 24 + sample = local_samples[23][t][pol]; + sum += weight_23 * sample; +#endif + +#if NR_STATIONS >= 25 + sample = local_samples[24][t][pol]; + sum += weight_24 * sample; +#endif + +#if NR_STATIONS >= 26 + sample = local_samples[25][t][pol]; + sum += weight_25 * sample; +#endif + +#if NR_STATIONS >= 27 + sample = local_samples[26][t][pol]; + sum += weight_26 * sample; +#endif + +#if NR_STATIONS >= 28 + sample = local_samples[27][t][pol]; + sum += weight_27 * sample; +#endif + +#if NR_STATIONS >= 29 + sample = local_samples[28][t][pol]; + sum += weight_28 * sample; +#endif + +#if NR_STATIONS >= 30 + sample = local_samples[29][t][pol]; + sum += weight_29 * sample; +#endif + +#if NR_STATIONS >= 31 + sample = local_samples[30][t][pol]; + sum += weight_30 * sample; +#endif + +#if NR_STATIONS >= 32 + sample = local_samples[31][t][pol]; + sum += weight_31 * sample; +#endif + +#if NR_STATIONS >= 33 + sample = local_samples[32][t][pol]; + sum += weight_32 * sample; +#endif + +#if NR_STATIONS >= 34 + sample = local_samples[33][t][pol]; + sum += weight_33 * sample; +#endif + +#if NR_STATIONS >= 35 + sample = local_samples[34][t][pol]; + sum += weight_34 * sample; +#endif + +#if NR_STATIONS >= 36 + sample = local_samples[35][t][pol]; + sum += weight_35 * sample; +#endif + +#if NR_STATIONS >= 37 + sample = local_samples[36][t][pol]; + sum += weight_36 * sample; +#endif + +#if NR_STATIONS >= 38 + sample = local_samples[37][t][pol]; + sum += weight_37 * sample; +#endif + +#if NR_STATIONS >= 39 + sample = local_samples[38][t][pol]; + sum += weight_38 * sample; +#endif + +#if NR_STATIONS >= 40 + sample = local_samples[39][t][pol]; + sum += weight_39 * sample; +#endif + +#if NR_STATIONS >= 41 + sample = local_samples[40][t][pol]; + sum += weight_40 * sample; +#endif + +#if NR_STATIONS >= 42 + sample = local_samples[41][t][pol]; + sum += weight_41 * sample; +#endif + +#if NR_STATIONS >= 43 + sample = local_samples[42][t][pol]; + sum += weight_42 * sample; +#endif + +#if NR_STATIONS >= 44 + sample = local_samples[43][t][pol]; + sum += weight_43 * sample; +#endif + +#if NR_STATIONS >= 45 + sample = local_samples[44][t][pol]; + sum += weight_44 * sample; +#endif + +#if NR_STATIONS >= 46 + sample = local_samples[45][t][pol]; + sum += weight_45 * sample; +#endif + +#if NR_STATIONS >= 47 + sum += weight_46 * local_samples[46][t][pol]; +#endif + +#if NR_STATIONS >= 48 + sum += weight_47 * local_samples[47][t][pol]; +#endif + +#if 0 + if (ri != 0) + local_tmp[tab][pol] = (float2) (-sum.y, sum.x); + + if (ri == 0) + (*complexVoltages)[subband][time + t][tab][pol] = sum + local_tmp[tab][pol]; +#else + float2 other_sum; + asm("shfl.down.b32 %0, %1, 1, 0x1F;" : "=r" (other_sum.x) : "r" (sum.x)); + asm("shfl.down.b32 %0, %1, 1, 0x1F;" : "=r" (other_sum.y) : "r" (sum.y)); + if (ri == 0) + (*complexVoltages)[subband][time + t][tab][pol] = sum + (float2) (-other_sum.y, other_sum.x); +#endif + } + + barrier(CLK_LOCAL_MEM_FENCE); + } +} diff --git a/RTCP/GPUProc/src/UHEP/BeamFormer.cl.ok b/RTCP/GPUProc/src/UHEP/BeamFormer.cl.ok new file mode 100644 index 0000000000000000000000000000000000000000..3858ce5772104a39d61a32a4372e308563a37b1f --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/BeamFormer.cl.ok @@ -0,0 +1,509 @@ +#if NR_BITS_PER_SAMPLE == 8 +typedef char2 SampleType; +#else +typedef short2 SampleType; +#endif + +#if NR_STATIONS > 48 +#error unsupported number of stations (max = 48) +#endif + + +typedef __global float2 (*ComplexVoltagesType)[NR_SUBBANDS][NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1][NR_TABS][NR_POLARIZATIONS]; +typedef __global SampleType (*SamplesType)[NR_STATIONS][NR_SUBBANDS][NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS]; +typedef __global float2 (*WeightsType)[NR_STATIONS][NR_SUBBANDS][NR_TABS]; + + +__kernel void complexVoltages(__global void *complexVoltagesPtr, + __global const void *samplesPtr, + __global const void *weightsPtr) +{ + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + SamplesType samples = (SamplesType) samplesPtr; + WeightsType weights = (WeightsType) weightsPtr; + + uint pol = get_local_id(0); + uint tab = get_local_id(1); + uint subband = get_global_id(2); + + float2 sample; + __local float2 local_samples[NR_STATIONS >= 24 ? 24 : NR_STATIONS][16][NR_POLARIZATIONS]; + +#if NR_STATIONS >= 1 + float2 weight_00 = (*weights)[ 0][subband][tab]; +#endif +#if NR_STATIONS >= 2 + float2 weight_01 = (*weights)[ 1][subband][tab]; +#endif +#if NR_STATIONS >= 3 + float2 weight_02 = (*weights)[ 2][subband][tab]; +#endif +#if NR_STATIONS >= 4 + float2 weight_03 = (*weights)[ 3][subband][tab]; +#endif +#if NR_STATIONS >= 5 + float2 weight_04 = (*weights)[ 4][subband][tab]; +#endif +#if NR_STATIONS >= 6 + float2 weight_05 = (*weights)[ 5][subband][tab]; +#endif +#if NR_STATIONS >= 7 + float2 weight_06 = (*weights)[ 6][subband][tab]; +#endif +#if NR_STATIONS >= 8 + float2 weight_07 = (*weights)[ 7][subband][tab]; +#endif +#if NR_STATIONS >= 9 + float2 weight_08 = (*weights)[ 8][subband][tab]; +#endif +#if NR_STATIONS >= 10 + float2 weight_09 = (*weights)[ 9][subband][tab]; +#endif +#if NR_STATIONS >= 11 + float2 weight_10 = (*weights)[10][subband][tab]; +#endif +#if NR_STATIONS >= 12 + float2 weight_11 = (*weights)[11][subband][tab]; +#endif +#if NR_STATIONS >= 13 + float2 weight_12 = (*weights)[12][subband][tab]; +#endif +#if NR_STATIONS >= 14 + float2 weight_13 = (*weights)[13][subband][tab]; +#endif +#if NR_STATIONS >= 15 + float2 weight_14 = (*weights)[14][subband][tab]; +#endif +#if NR_STATIONS >= 16 + float2 weight_15 = (*weights)[15][subband][tab]; +#endif +#if NR_STATIONS >= 17 + float2 weight_16 = (*weights)[16][subband][tab]; +#endif +#if NR_STATIONS >= 18 + float2 weight_17 = (*weights)[17][subband][tab]; +#endif +#if NR_STATIONS >= 19 + float2 weight_18 = (*weights)[18][subband][tab]; +#endif +#if NR_STATIONS >= 20 + float2 weight_19 = (*weights)[19][subband][tab]; +#endif +#if NR_STATIONS >= 21 + float2 weight_20 = (*weights)[20][subband][tab]; +#endif +#if NR_STATIONS >= 22 + float2 weight_21 = (*weights)[21][subband][tab]; +#endif +#if NR_STATIONS >= 23 + float2 weight_22 = (*weights)[22][subband][tab]; +#endif +#if NR_STATIONS >= 24 + float2 weight_23 = (*weights)[23][subband][tab]; +#endif + + for (uint time = 0; time < NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1; time += 16) { + for (uint i = get_local_id(0) + NR_POLARIZATIONS * get_local_id(1); i < (NR_STATIONS >= 24 ? 24 : NR_STATIONS) * 16 * NR_POLARIZATIONS; i += NR_TABS * NR_POLARIZATIONS) { + uint p = i % NR_POLARIZATIONS; + uint t = (i / NR_POLARIZATIONS) % 16; + uint s = i / NR_POLARIZATIONS / 16; + + if (time + t < NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1) + local_samples[0][0][i] = convert_float2((*samples)[s][subband][time + t][p]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for (uint t = 0; t < min(16U, (NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1 - time)); t ++) { + float2 sum = 0; + +#if NR_STATIONS >= 1 + sample = local_samples[ 0][t][pol]; + sum += weight_00.xx * sample; + sum += weight_00.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 2 + sample = local_samples[ 1][t][pol]; + sum += weight_01.xx * sample; + sum += weight_01.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 3 + sample = local_samples[ 2][t][pol]; + sum += weight_02.xx * sample; + sum += weight_02.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 4 + sample = local_samples[ 3][t][pol]; + sum += weight_03.xx * sample; + sum += weight_03.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 5 + sample = local_samples[ 4][t][pol]; + sum += weight_04.xx * sample; + sum += weight_04.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 6 + sample = local_samples[ 5][t][pol]; + sum += weight_05.xx * sample; + sum += weight_05.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 7 + sample = local_samples[ 6][t][pol]; + sum += weight_06.xx * sample; + sum += weight_06.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 8 + sample = local_samples[ 7][t][pol]; + sum += weight_07.xx * sample; + sum += weight_07.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 9 + sample = local_samples[ 8][t][pol]; + sum += weight_08.xx * sample; + sum += weight_08.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 10 + sample = local_samples[ 9][t][pol]; + sum += weight_09.xx * sample; + sum += weight_09.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 11 + sample = local_samples[10][t][pol]; + sum += weight_10.xx * sample; + sum += weight_10.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 12 + sample = local_samples[11][t][pol]; + sum += weight_11.xx * sample; + sum += weight_11.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 13 + sample = local_samples[12][t][pol]; + sum += weight_12.xx * sample; + sum += weight_12.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 14 + sample = local_samples[13][t][pol]; + sum += weight_13.xx * sample; + sum += weight_13.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 15 + sample = local_samples[14][t][pol]; + sum += weight_14.xx * sample; + sum += weight_14.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 16 + sample = local_samples[15][t][pol]; + sum += weight_15.xx * sample; + sum += weight_15.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 17 + sample = local_samples[16][t][pol]; + sum += weight_16.xx * sample; + sum += weight_16.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 18 + sample = local_samples[17][t][pol]; + sum += weight_17.xx * sample; + sum += weight_17.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 19 + sample = local_samples[18][t][pol]; + sum += weight_18.xx * sample; + sum += weight_18.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 20 + sample = local_samples[19][t][pol]; + sum += weight_19.xx * sample; + sum += weight_19.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 21 + sample = local_samples[20][t][pol]; + sum += weight_20.xx * sample; + sum += weight_20.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 22 + sample = local_samples[21][t][pol]; + sum += weight_21.xx * sample; + sum += weight_21.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 23 + sample = local_samples[22][t][pol]; + sum += weight_22.xx * sample; + sum += weight_22.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 24 + sample = local_samples[23][t][pol]; + sum += weight_23.xx * sample; + sum += weight_23.yy * (float2) (-sample.y, sample.x); +#endif + + (*complexVoltages)[subband][time + t][tab][pol] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + +#if NR_STATIONS > 24 +#if NR_STATIONS >= 25 + float2 weight_24 = (*weights)[24][subband][tab]; +#endif +#if NR_STATIONS >= 26 + float2 weight_25 = (*weights)[25][subband][tab]; +#endif +#if NR_STATIONS >= 27 + float2 weight_26 = (*weights)[26][subband][tab]; +#endif +#if NR_STATIONS >= 28 + float2 weight_27 = (*weights)[27][subband][tab]; +#endif +#if NR_STATIONS >= 29 + float2 weight_28 = (*weights)[28][subband][tab]; +#endif +#if NR_STATIONS >= 30 + float2 weight_29 = (*weights)[29][subband][tab]; +#endif +#if NR_STATIONS >= 31 + float2 weight_30 = (*weights)[30][subband][tab]; +#endif +#if NR_STATIONS >= 32 + float2 weight_31 = (*weights)[31][subband][tab]; +#endif +#if NR_STATIONS >= 33 + float2 weight_32 = (*weights)[32][subband][tab]; +#endif +#if NR_STATIONS >= 34 + float2 weight_33 = (*weights)[33][subband][tab]; +#endif +#if NR_STATIONS >= 35 + float2 weight_34 = (*weights)[34][subband][tab]; +#endif +#if NR_STATIONS >= 36 + float2 weight_35 = (*weights)[35][subband][tab]; +#endif +#if NR_STATIONS >= 37 + float2 weight_36 = (*weights)[36][subband][tab]; +#endif +#if NR_STATIONS >= 38 + float2 weight_37 = (*weights)[37][subband][tab]; +#endif +#if NR_STATIONS >= 39 + float2 weight_38 = (*weights)[38][subband][tab]; +#endif +#if NR_STATIONS >= 40 + float2 weight_39 = (*weights)[39][subband][tab]; +#endif +#if NR_STATIONS >= 41 + float2 weight_40 = (*weights)[40][subband][tab]; +#endif +#if NR_STATIONS >= 42 + float2 weight_41 = (*weights)[41][subband][tab]; +#endif +#if NR_STATIONS >= 43 + float2 weight_42 = (*weights)[42][subband][tab]; +#endif +#if NR_STATIONS >= 44 + float2 weight_43 = (*weights)[43][subband][tab]; +#endif +#if NR_STATIONS >= 45 + float2 weight_44 = (*weights)[44][subband][tab]; +#endif +#if NR_STATIONS >= 46 + float2 weight_45 = (*weights)[45][subband][tab]; +#endif +#if NR_STATIONS >= 47 + float2 weight_46 = (*weights)[46][subband][tab]; +#endif +#if NR_STATIONS >= 48 + float2 weight_47 = (*weights)[47][subband][tab]; +#endif + + for (uint time = 0; time < NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1; time += 16) { + for (uint i = get_local_id(0) + NR_POLARIZATIONS * get_local_id(1); i < (NR_STATIONS - 24) * 16 * NR_POLARIZATIONS; i += NR_TABS * NR_POLARIZATIONS) { + uint p = i % NR_POLARIZATIONS; + uint t = (i / NR_POLARIZATIONS) % 16; + uint s = i / NR_POLARIZATIONS / 16; + + if (time + t < NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1) + local_samples[0][0][i] = convert_float2((*samples)[24 + s][subband][time + t][p]); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + for (uint t = 0; t < min(16U, (NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1 - time)); t ++) { + float2 sum = (*complexVoltages)[subband][time + t][tab][pol]; + +#if NR_STATIONS >= 25 + sample = local_samples[ 0][t][pol]; + sum += weight_24.xx * sample; + sum += weight_24.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 26 + sample = local_samples[ 1][t][pol]; + sum += weight_25.xx * sample; + sum += weight_25.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 27 + sample = local_samples[ 2][t][pol]; + sum += weight_26.xx * sample; + sum += weight_26.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 28 + sample = local_samples[ 3][t][pol]; + sum += weight_27.xx * sample; + sum += weight_27.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 29 + sample = local_samples[ 4][t][pol]; + sum += weight_28.xx * sample; + sum += weight_28.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 30 + sample = local_samples[ 5][t][pol]; + sum += weight_29.xx * sample; + sum += weight_29.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 31 + sample = local_samples[ 6][t][pol]; + sum += weight_30.xx * sample; + sum += weight_30.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 32 + sample = local_samples[ 7][t][pol]; + sum += weight_31.xx * sample; + sum += weight_31.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 33 + sample = local_samples[ 8][t][pol]; + sum += weight_32.xx * sample; + sum += weight_32.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 34 + sample = local_samples[ 9][t][pol]; + sum += weight_33.xx * sample; + sum += weight_33.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 35 + sample = local_samples[10][t][pol]; + sum += weight_34.xx * sample; + sum += weight_34.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 36 + sample = local_samples[11][t][pol]; + sum += weight_35.xx * sample; + sum += weight_35.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 37 + sample = local_samples[12][t][pol]; + sum += weight_36.xx * sample; + sum += weight_36.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 38 + sample = local_samples[13][t][pol]; + sum += weight_37.xx * sample; + sum += weight_37.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 39 + sample = local_samples[14][t][pol]; + sum += weight_38.xx * sample; + sum += weight_38.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 40 + sample = local_samples[15][t][pol]; + sum += weight_39.xx * sample; + sum += weight_39.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 41 + sample = local_samples[16][t][pol]; + sum += weight_40.xx * sample; + sum += weight_40.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 42 + sample = local_samples[17][t][pol]; + sum += weight_41.xx * sample; + sum += weight_41.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 43 + sample = local_samples[18][t][pol]; + sum += weight_42.xx * sample; + sum += weight_42.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 44 + sample = local_samples[19][t][pol]; + sum += weight_43.xx * sample; + sum += weight_19.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 45 + sample = local_samples[20][t][pol]; + sum += weight_44.xx * sample; + sum += weight_44.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 46 + sample = local_samples[21][t][pol]; + sum += weight_45.xx * sample; + sum += weight_45.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 47 + sample = local_samples[22][t][pol]; + sum += weight_46.xx * sample; + sum += weight_46.yy * (float2) (-sample.y, sample.x); +#endif + +#if NR_STATIONS >= 48 + sample = local_samples[23][t][pol]; + sum += weight_47.xx * sample; + sum += weight_47.yy * (float2) (-sample.y, sample.x); +#endif + + (*complexVoltages)[subband][time + t][tab][pol] = sum; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } +#endif +} diff --git a/RTCP/GPUProc/src/UHEP/InvFFT.cl b/RTCP/GPUProc/src/UHEP/InvFFT.cl new file mode 100644 index 0000000000000000000000000000000000000000..350953c55b3df035477a3873079ad707410c74f6 --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/InvFFT.cl @@ -0,0 +1,287 @@ +#include "math.cl" + + +typedef __global float (*InvFFTedDataType)[NR_TABS][NR_POLARIZATIONS][NR_SAMPLES_PER_SUBBAND + NR_STATION_FILTER_TAPS - 1][1024]; +typedef __global float2 (*TransposedDataType)[NR_TABS][NR_POLARIZATIONS][NR_SAMPLES_PER_SUBBAND + NR_STATION_FILTER_TAPS - 1][512]; + + +float2 inv(float2 a) +{ + return (float2) (-a.y, a.x); +} + + +void inv4(float2 *R0, float2 *R1, float2 *R2, float2 *R3) +{ + float2 T0, T1, T2, T3; + + T1 = (*R1); (*R1) = (*R2); (*R2) = T1; + + T0 = (*R0), T1 = (*R1), (*R0) = T0 + T1, (*R1) = T0 - T1; + T2 = (*R2), T3 = (*R3), (*R2) = T2 + T3, (*R3) = T2 - T3; + + T0 = (*R0), T2 = (*R2), (*R0) = T0 + T2, (*R2) = T0 - T2; + T1 = (*R1), T3 = inv(*R3), (*R1) = T1 + T3, (*R3) = T1 - T3; +} + + +void inv8(float2 *R0, float2 *R1, float2 *R2, float2 *R3, float2 *R4, float2 *R5, float2 *R6, float2 *R7) +{ + const float HSQR2 = .70710678118654752440f; + float2 T0, T1, T2, T3, T4, T5, T6, T7; + + T1 = (*R1); (*R1) = (*R4); (*R4) = T1; + T3 = (*R3); (*R3) = (*R6); (*R6) = T3; + + T0 = (*R0), T1 = (*R1) , (*R0) = T0 + T1, (*R1) = T0 - T1; + T2 = (*R2), T3 = (*R3) , (*R2) = T2 + T3, (*R3) = T2 - T3; + T4 = (*R4), T5 = (*R5) , (*R4) = T4 + T5, (*R5) = T4 - T5; + T6 = (*R6), T7 = (*R7) , (*R6) = T6 + T7, (*R7) = T6 - T7; + + T0 = (*R0), T2 = (*R2) , (*R0) = T0 + T2, (*R2) = T0 - T2; + T1 = (*R1), T3 = inv(*R3) , (*R1) = T1 + T3, (*R3) = T1 - T3; + T4 = (*R4), T6 = (*R6) , (*R4) = T4 + T6, (*R6) = T4 - T6; + T5 = (*R5), T7 = inv(*R7) , (*R5) = T5 + T7, (*R7) = T5 - T7; + + T0 = (*R0), T4 = (*R4) , (*R0) = T0 + T4, (*R4) = T0 - T4; + T1 = (*R1), T5 = HSQR2 * (inv(*R5) + (*R5)), (*R1) = T1 + T5, (*R5) = T1 - T5; + T2 = (*R2), T6 = inv(*R6) , (*R2) = T2 + T6, (*R6) = T2 - T6; + T3 = (*R3), T7 = HSQR2 * (inv(*R7) - (*R7)), (*R3) = T3 + T7, (*R7) = T3 - T7; +} + + +__kernel __attribute__((reqd_work_group_size(128, 1, 1))) +void inv_fft(__global float2 *outputPtr, __global const float *inputPtr) +{ + InvFFTedDataType invFFTedData = (InvFFTedDataType) outputPtr; + TransposedDataType transposedData = (TransposedDataType) inputPtr; + + const float PI = 3.14159265358979323844f; + + __local union { + float f1[1024]; + float2 f2[512]; + float4 f4[256]; + float8 f8[128]; + } lds; + + uint windex; + float ang; + float2 R0, R1, R2, R3, R4, R5, R6, R7; + float2 W0, W1, W2, W3; + +#if 0 + __global float2 *bufIn = inputPtr + get_global_id(1) * 512; + __global float *bufOut = outputPtr + get_global_id(1) * 1024; + + R0 = bufIn[get_local_id(0) + 0]; + R1 = bufIn[get_local_id(0) + 128]; + R2 = bufIn[get_local_id(0) + 256]; + R3 = bufIn[get_local_id(0) + 384]; +#else + R0 = (*transposedData)[0][0][get_global_id(1)][get_local_id(0) + 0]; + R1 = (*transposedData)[0][0][get_global_id(1)][get_local_id(0) + 128]; + R2 = (*transposedData)[0][0][get_global_id(1)][get_local_id(0) + 256]; + R3 = (*transposedData)[0][0][get_global_id(1)][get_local_id(0) + 384]; +#endif + + lds.f2[get_local_id(0) + 0] = R0; + lds.f2[get_local_id(0) + 128] = R1; + lds.f2[get_local_id(0) + 256] = R2; + lds.f2[get_local_id(0) + 384] = R3; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(0) == 0) { + R4 = (float2) (R0.y, 0); + R0 = (float2) (R0.x, 0); + //R4 = (float2) (bufIn[512].x, 0); + } else { + R4 = lds.f2[512 - get_local_id(0)]; + } + + R5 = lds.f2[384 - get_local_id(0)]; + R6 = lds.f2[256 - get_local_id(0)]; + R7 = lds.f2[128 - get_local_id(0)]; + R4.y = -R4.y; + R5.y = -R5.y; + R6.y = -R6.y; + R7.y = -R7.y; + + inv8(&R0, &R1, &R2, &R3, &R4, &R5, &R6, &R7); + + barrier(CLK_LOCAL_MEM_FENCE); + + lds.f8[get_local_id(0)] = (float8) (R0.x, R1.x, R2.x, R3.x, R4.x, R5.x, R6.x, R7.x); + + barrier(CLK_LOCAL_MEM_FENCE); + + R0.x = lds.f1[get_local_id(0) + 0]; + R1.x = lds.f1[get_local_id(0) + 128]; + R2.x = lds.f1[get_local_id(0) + 256]; + R3.x = lds.f1[get_local_id(0) + 384]; + R4.x = lds.f1[get_local_id(0) + 512]; + R5.x = lds.f1[get_local_id(0) + 640]; + R6.x = lds.f1[get_local_id(0) + 768]; + R7.x = lds.f1[get_local_id(0) + 896]; + + barrier(CLK_LOCAL_MEM_FENCE); + + lds.f8[get_local_id(0)] = (float8) (R0.y, R1.y, R2.y, R3.y, R4.y, R5.y, R6.y, R7.y); + + barrier(CLK_LOCAL_MEM_FENCE); + + R0.y = lds.f1[get_local_id(0) + 0]; + R1.y = lds.f1[get_local_id(0) + 128]; + R2.y = lds.f1[get_local_id(0) + 256]; + R3.y = lds.f1[get_local_id(0) + 384]; + R4.y = lds.f1[get_local_id(0) + 512]; + R5.y = lds.f1[get_local_id(0) + 640]; + R6.y = lds.f1[get_local_id(0) + 768]; + R7.y = lds.f1[get_local_id(0) + 896]; + + barrier(CLK_LOCAL_MEM_FENCE); + + ang = 2.0f * PI / 64.0f * (float) (get_local_id(0) % 8); + R1 = cmul(cexp( ang), R1); + R2 = cmul(cexp(2.0f * ang), R2); + R3 = cmul(cexp(3.0f * ang), R3); + R4 = cmul(cexp(4.0f * ang), R4); + R5 = cmul(cexp(5.0f * ang), R5); + R6 = cmul(cexp(6.0f * ang), R6); + R7 = cmul(cexp(7.0f * ang), R7); + + inv8(&R0, &R1, &R2, &R3, &R4, &R5, &R6, &R7); + + windex = 64 * (get_local_id(0) / 8) + get_local_id(0) % 8; + lds.f1[windex + 0] = R0.x; + lds.f1[windex + 8] = R1.x; + lds.f1[windex + 16] = R2.x; + lds.f1[windex + 24] = R3.x; + lds.f1[windex + 32] = R4.x; + lds.f1[windex + 40] = R5.x; + lds.f1[windex + 48] = R6.x; + lds.f1[windex + 56] = R7.x; + + barrier(CLK_LOCAL_MEM_FENCE); + + W0 = lds.f2[get_local_id(0) + 0]; + W1 = lds.f2[get_local_id(0) + 128]; + W2 = lds.f2[get_local_id(0) + 256]; + W3 = lds.f2[get_local_id(0) + 384]; + R0.x = W0.x; + R1.x = W1.x; + R2.x = W2.x; + R3.x = W3.x; + R4.x = W0.y; + R5.x = W1.y; + R6.x = W2.y; + R7.x = W3.y; + + barrier(CLK_LOCAL_MEM_FENCE); + + lds.f1[windex + 0] = R0.y; + lds.f1[windex + 8] = R1.y; + lds.f1[windex + 16] = R2.y; + lds.f1[windex + 24] = R3.y; + lds.f1[windex + 32] = R4.y; + lds.f1[windex + 40] = R5.y; + lds.f1[windex + 48] = R6.y; + lds.f1[windex + 56] = R7.y; + + barrier(CLK_LOCAL_MEM_FENCE); + + W0 = lds.f2[get_local_id(0) + 0]; + W1 = lds.f2[get_local_id(0) + 128]; + W2 = lds.f2[get_local_id(0) + 256]; + W3 = lds.f2[get_local_id(0) + 384]; + R0.y = W0.x; + R1.y = W1.x; + R2.y = W2.x; + R3.y = W3.x; + R4.y = W0.y; + R5.y = W1.y; + R6.y = W2.y; + R7.y = W3.y; + + barrier(CLK_LOCAL_MEM_FENCE); + + ang = 2.0f * PI / 256.0f * (2 * (get_local_id(0) % 32)); + R1 = cmul(cexp( ang), R1); + R2 = cmul(cexp(2.0f * ang), R2); + R3 = cmul(cexp(3.0f * ang), R3); + ang += 2.0f * PI / 256.0f; + R5 = cmul(cexp( ang), R5); + R6 = cmul(cexp(2.0f * ang), R6); + R7 = cmul(cexp(3.0f * ang), R7); + + inv4(&R0, &R1, &R2, &R3); + inv4(&R4, &R5, &R6, &R7); + + windex = 128 * (get_local_id(0) / 32) + get_local_id(0) % 32; + lds.f2[windex + 0] = (float2) (R0.x, R4.x); + lds.f2[windex + 32] = (float2) (R1.x, R5.x); + lds.f2[windex + 64] = (float2) (R2.x, R6.x); + lds.f2[windex + 96] = (float2) (R3.x, R7.x); + + barrier(CLK_LOCAL_MEM_FENCE); + + W0 = lds.f2[get_local_id(0) + 0]; + W1 = lds.f2[get_local_id(0) + 128]; + W2 = lds.f2[get_local_id(0) + 256]; + W3 = lds.f2[get_local_id(0) + 384]; + R0.x = W0.x; + R1.x = W1.x; + R2.x = W2.x; + R3.x = W3.x; + R4.x = W0.y; + R5.x = W1.y; + R6.x = W2.y; + R7.x = W3.y; + + lds.f2[get_local_id(0) + 0] = (float2) (R0.y, R4.y); + lds.f2[get_local_id(0) + 128] = (float2) (R1.y, R5.y); + lds.f2[get_local_id(0) + 256] = (float2) (R2.y, R6.y); + lds.f2[get_local_id(0) + 384] = (float2) (R3.y, R7.y); + + barrier(CLK_LOCAL_MEM_FENCE); + + W0 = lds.f2[windex + 0]; + W1 = lds.f2[windex + 32]; + W2 = lds.f2[windex + 64]; + W3 = lds.f2[windex + 96]; + R0.y = W0.x; + R1.y = W1.x; + R2.y = W2.x; + R3.y = W3.x; + R4.y = W0.y; + R5.y = W1.y; + R6.y = W2.y; + R7.y = W3.y; + + ang = 2.0f * PI / 1024.0f * (2 * get_local_id(0)); + R1 = cmul(cexp( ang), R1); + R2 = cmul(cexp(2.0f * ang), R2); + R3 = cmul(cexp(3.0f * ang), R3); + ang += 2.0f * PI / 1024.0f; + R5 = cmul(cexp( ang), R5); + R6 = cmul(cexp(2.0f * ang), R6); + R7 = cmul(cexp(3.0f * ang), R7); + + inv4(&R0, &R1, &R2, &R3); + inv4(&R4, &R5, &R6, &R7); + +#if 0 + __global float2 *out = (__global float2 *) bufOut; + out[get_local_id(0) + 0] = 9.765625e-04f * (float2) (R0.x, R4.x); + out[get_local_id(0) + 128] = 9.765625e-04f * (float2) (R1.x, R5.x); + out[get_local_id(0) + 256] = 9.765625e-04f * (float2) (R2.x, R6.x); + out[get_local_id(0) + 384] = 9.765625e-04f * (float2) (R3.x, R7.x); +#else + __global float2 *out = (__global float2 *) &(*invFFTedData)[0][0][get_global_id(1)][0] + get_local_id(0); + //out[ 0] = 9.765625e-04f * (float2) (R0.x, R4.x); + //out[128] = 9.765625e-04f * (float2) (R1.x, R5.x); + //out[256] = 9.765625e-04f * (float2) (R2.x, R6.x); + //out[384] = 9.765625e-04f * (float2) (R3.x, R7.x); +#endif +} diff --git a/RTCP/GPUProc/src/UHEP/InvFFT.cl-0.ptx b/RTCP/GPUProc/src/UHEP/InvFFT.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..86d5c2b091806e681f5dfe3df13f2ce5d1d68535 --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/InvFFT.cl-0.ptx @@ -0,0 +1,759 @@ +// +// Generated by NVIDIA NVVM Compiler +// Compiler built on Tue Feb 7 07:15:59 2012 (1328595359) +// Driver 295.20 +// + +.version 3.0 +.target sm_21, texmode_independent +.address_size 32 + +.extern .shared .align 32 .b8 shr_3_lds[4096]; + +.entry inv_fft( + .param .u32 .ptr .global .align 8 inv_fft_param_0, + .param .u32 .ptr .global .align 4 inv_fft_param_1 +) +.reqntid 128, 1, 1 +{ + .reg .f32 %f<576>; + .reg .pred %p<2>; + .reg .s32 %r<212>; + + + ld.param.u32 %r26, [inv_fft_param_1]; + // inline asm + mov.u32 %r2, %tid.x; + // inline asm + // inline asm + mov.u32 %r3, %envreg4; + // inline asm + // inline asm + mov.u32 %r4, %ntid.y; + // inline asm + // inline asm + mov.u32 %r5, %ctaid.y; + // inline asm + // inline asm + mov.u32 %r6, %tid.y; + // inline asm + add.s32 %r27, %r6, %r3; + mad.lo.s32 %r28, %r5, %r4, %r27; + shl.b32 %r29, %r28, 12; + add.s32 %r30, %r26, %r29; + shl.b32 %r31, %r2, 3; + add.s32 %r32, %r30, %r31; + ld.global.v2.f32 {%f574, %f575}, [%r32]; + // inline asm + mov.u32 %r7, %tid.x; + // inline asm + // inline asm + mov.u32 %r8, %envreg4; + // inline asm + // inline asm + mov.u32 %r9, %ntid.y; + // inline asm + // inline asm + mov.u32 %r10, %ctaid.y; + // inline asm + // inline asm + mov.u32 %r11, %tid.y; + // inline asm + add.s32 %r33, %r11, %r8; + mad.lo.s32 %r34, %r10, %r9, %r33; + shl.b32 %r35, %r34, 12; + add.s32 %r36, %r26, %r35; + shl.b32 %r37, %r7, 3; + add.s32 %r38, %r37, %r36; + ld.global.v2.f32 {%f516, %f517}, [%r38+1024]; + // inline asm + mov.u32 %r12, %tid.x; + // inline asm + // inline asm + mov.u32 %r13, %envreg4; + // inline asm + // inline asm + mov.u32 %r14, %ntid.y; + // inline asm + // inline asm + mov.u32 %r15, %ctaid.y; + // inline asm + // inline asm + mov.u32 %r16, %tid.y; + // inline asm + add.s32 %r39, %r16, %r13; + mad.lo.s32 %r40, %r15, %r14, %r39; + shl.b32 %r41, %r40, 12; + add.s32 %r42, %r26, %r41; + shl.b32 %r43, %r12, 3; + add.s32 %r44, %r43, %r42; + ld.global.v2.f32 {%f510, %f511}, [%r44+2048]; + // inline asm + mov.u32 %r17, %tid.x; + // inline asm + // inline asm + mov.u32 %r18, %envreg4; + // inline asm + // inline asm + mov.u32 %r19, %ntid.y; + // inline asm + // inline asm + mov.u32 %r20, %ctaid.y; + // inline asm + // inline asm + mov.u32 %r21, %tid.y; + // inline asm + add.s32 %r45, %r21, %r18; + mad.lo.s32 %r46, %r20, %r19, %r45; + shl.b32 %r47, %r46, 12; + add.s32 %r48, %r26, %r47; + shl.b32 %r49, %r17, 3; + add.s32 %r50, %r49, %r48; + ld.global.v2.f32 {%f522, %f523}, [%r50+3072]; + // inline asm + mov.u32 %r22, %tid.x; + // inline asm + shl.b32 %r51, %r22, 3; + mov.u32 %r52, shr_3_lds; + add.s32 %r53, %r52, %r51; + st.shared.v2.f32 [%r53], {%f574, %f575}; + // inline asm + mov.u32 %r23, %tid.x; + // inline asm + shl.b32 %r54, %r23, 3; + add.s32 %r55, %r54, %r52; + st.shared.v2.f32 [%r55+1024], {%f516, %f517}; + // inline asm + mov.u32 %r24, %tid.x; + // inline asm + shl.b32 %r56, %r24, 3; + add.s32 %r57, %r56, %r52; + st.shared.v2.f32 [%r57+2048], {%f510, %f511}; + // inline asm + mov.u32 %r25, %tid.x; + // inline asm + shl.b32 %r58, %r25, 3; + add.s32 %r59, %r58, %r52; + st.shared.v2.f32 [%r59+3072], {%f522, %f523}; + bar.sync 0; + // inline asm + mov.u32 %r60, %tid.x; + // inline asm + setp.eq.s32 %p1, %r60, 0; + @%p1 bra BB0_2; + + // inline asm + mov.u32 %r61, %tid.x; + // inline asm + mov.u32 %r62, 512; + sub.s32 %r63, %r62, %r61; + shl.b32 %r64, %r63, 3; + add.s32 %r66, %r52, %r64; + ld.shared.v2.f32 {%f572, %f573}, [%r66]; + bra.uni BB0_3; + +BB0_2: + mov.f32 %f17, 0f00000000; + mov.f32 %f572, %f575; + mov.f32 %f573, %f17; + mov.f32 %f574, %f574; + mov.f32 %f575, %f17; + +BB0_3: + // inline asm + mov.u32 %r67, %tid.x; + // inline asm + mov.u32 %r70, 384; + sub.s32 %r71, %r70, %r67; + shl.b32 %r72, %r71, 3; + add.s32 %r74, %r52, %r72; + ld.shared.v2.f32 {%f490, %f491}, [%r74]; + // inline asm + mov.u32 %r68, %tid.x; + // inline asm + mov.u32 %r75, 256; + sub.s32 %r76, %r75, %r68; + shl.b32 %r77, %r76, 3; + add.s32 %r78, %r52, %r77; + ld.shared.v2.f32 {%f492, %f493}, [%r78]; + // inline asm + mov.u32 %r69, %tid.x; + // inline asm + mov.u32 %r79, 128; + sub.s32 %r80, %r79, %r69; + shl.b32 %r81, %r80, 3; + add.s32 %r82, %r52, %r81; + neg.ftz.f32 %f20, %f573; + neg.ftz.f32 %f22, %f491; + neg.ftz.f32 %f24, %f493; + ld.shared.v2.f32 {%f500, %f501}, [%r82]; + neg.ftz.f32 %f26, %f501; + add.ftz.f32 %f504, %f574, %f572; + add.ftz.f32 %f505, %f575, %f20; + sub.ftz.f32 %f506, %f574, %f572; + sub.ftz.f32 %f507, %f575, %f20; + add.ftz.f32 %f508, %f510, %f492; + add.ftz.f32 %f509, %f511, %f24; + sub.ftz.f32 %f512, %f510, %f492; + sub.ftz.f32 %f513, %f511, %f24; + add.ftz.f32 %f514, %f516, %f490; + add.ftz.f32 %f515, %f517, %f22; + sub.ftz.f32 %f518, %f516, %f490; + sub.ftz.f32 %f519, %f517, %f22; + add.ftz.f32 %f520, %f522, %f500; + add.ftz.f32 %f521, %f523, %f26; + sub.ftz.f32 %f524, %f522, %f500; + sub.ftz.f32 %f525, %f523, %f26; + add.ftz.f32 %f526, %f504, %f508; + add.ftz.f32 %f527, %f505, %f509; + sub.ftz.f32 %f528, %f504, %f508; + sub.ftz.f32 %f529, %f505, %f509; + neg.ftz.f32 %f28, %f513; + add.ftz.f32 %f534, %f506, %f28; + add.ftz.f32 %f535, %f507, %f512; + sub.ftz.f32 %f536, %f506, %f28; + sub.ftz.f32 %f537, %f507, %f512; + add.ftz.f32 %f538, %f514, %f520; + add.ftz.f32 %f539, %f515, %f521; + sub.ftz.f32 %f540, %f514, %f520; + sub.ftz.f32 %f541, %f515, %f521; + neg.ftz.f32 %f31, %f525; + add.ftz.f32 %f546, %f518, %f31; + add.ftz.f32 %f547, %f519, %f524; + sub.ftz.f32 %f548, %f518, %f31; + sub.ftz.f32 %f549, %f519, %f524; + add.ftz.f32 %f446, %f526, %f538; + add.ftz.f32 %f447, %f527, %f539; + sub.ftz.f32 %f454, %f526, %f538; + sub.ftz.f32 %f455, %f527, %f539; + neg.ftz.f32 %f34, %f547; + add.ftz.f32 %f554, %f34, %f546; + add.ftz.f32 %f555, %f546, %f547; + mov.f32 %f36, 0f3F3504F3; + fma.rn.ftz.f32 %f448, %f554, %f36, %f534; + fma.rn.ftz.f32 %f449, %f555, %f36, %f535; + neg.f32 %f558, %f554; + neg.f32 %f559, %f555; + fma.rn.ftz.f32 %f456, %f558, %f36, %f534; + fma.rn.ftz.f32 %f457, %f559, %f36, %f535; + neg.ftz.f32 %f38, %f541; + add.ftz.f32 %f450, %f528, %f38; + add.ftz.f32 %f451, %f529, %f540; + sub.ftz.f32 %f458, %f528, %f38; + sub.ftz.f32 %f459, %f529, %f540; + neg.ftz.f32 %f41, %f549; + sub.ftz.f32 %f568, %f41, %f548; + sub.ftz.f32 %f569, %f548, %f549; + fma.rn.ftz.f32 %f452, %f568, %f36, %f536; + fma.rn.ftz.f32 %f453, %f569, %f36, %f537; + neg.f32 %f570, %f568; + neg.f32 %f571, %f569; + fma.rn.ftz.f32 %f460, %f570, %f36, %f536; + fma.rn.ftz.f32 %f461, %f571, %f36, %f537; + bar.sync 0; + // inline asm + mov.u32 %r83, %tid.x; + // inline asm + shl.b32 %r84, %r83, 5; + add.s32 %r86, %r52, %r84; + st.shared.v4.f32 [%r86+16], {%f454, %f456, %f458, %f460}; + st.shared.v4.f32 [%r86], {%f446, %f448, %f450, %f452}; + bar.sync 0; + // inline asm + mov.u32 %r87, %tid.x; + // inline asm + shl.b32 %r95, %r87, 2; + add.s32 %r97, %r52, %r95; + ld.shared.f32 %f51, [%r97]; + // inline asm + mov.u32 %r88, %tid.x; + // inline asm + shl.b32 %r98, %r88, 2; + add.s32 %r99, %r98, %r52; + ld.shared.f32 %f1, [%r99+512]; + // inline asm + mov.u32 %r89, %tid.x; + // inline asm + shl.b32 %r100, %r89, 2; + add.s32 %r101, %r100, %r52; + ld.shared.f32 %f2, [%r101+1024]; + // inline asm + mov.u32 %r90, %tid.x; + // inline asm + shl.b32 %r102, %r90, 2; + add.s32 %r103, %r102, %r52; + ld.shared.f32 %f3, [%r103+1536]; + // inline asm + mov.u32 %r91, %tid.x; + // inline asm + shl.b32 %r104, %r91, 2; + add.s32 %r105, %r104, %r52; + ld.shared.f32 %f4, [%r105+2048]; + // inline asm + mov.u32 %r92, %tid.x; + // inline asm + shl.b32 %r106, %r92, 2; + add.s32 %r107, %r106, %r52; + ld.shared.f32 %f5, [%r107+2560]; + // inline asm + mov.u32 %r93, %tid.x; + // inline asm + shl.b32 %r108, %r93, 2; + add.s32 %r109, %r108, %r52; + ld.shared.f32 %f6, [%r109+3072]; + // inline asm + mov.u32 %r94, %tid.x; + // inline asm + shl.b32 %r110, %r94, 2; + add.s32 %r111, %r110, %r52; + ld.shared.f32 %f7, [%r111+3584]; + bar.sync 0; + // inline asm + mov.u32 %r112, %tid.x; + // inline asm + shl.b32 %r113, %r112, 5; + add.s32 %r115, %r52, %r113; + st.shared.v4.f32 [%r115+16], {%f455, %f457, %f459, %f461}; + st.shared.v4.f32 [%r115], {%f447, %f449, %f451, %f453}; + bar.sync 0; + // inline asm + mov.u32 %r116, %tid.x; + // inline asm + shl.b32 %r124, %r116, 2; + add.s32 %r126, %r52, %r124; + ld.shared.f32 %f61, [%r126]; + // inline asm + mov.u32 %r117, %tid.x; + // inline asm + shl.b32 %r127, %r117, 2; + add.s32 %r128, %r127, %r52; + ld.shared.f32 %f8, [%r128+512]; + // inline asm + mov.u32 %r118, %tid.x; + // inline asm + shl.b32 %r129, %r118, 2; + add.s32 %r130, %r129, %r52; + ld.shared.f32 %f9, [%r130+1024]; + // inline asm + mov.u32 %r119, %tid.x; + // inline asm + shl.b32 %r131, %r119, 2; + add.s32 %r132, %r131, %r52; + ld.shared.f32 %f10, [%r132+1536]; + // inline asm + mov.u32 %r120, %tid.x; + // inline asm + shl.b32 %r133, %r120, 2; + add.s32 %r134, %r133, %r52; + ld.shared.f32 %f11, [%r134+2048]; + // inline asm + mov.u32 %r121, %tid.x; + // inline asm + shl.b32 %r135, %r121, 2; + add.s32 %r136, %r135, %r52; + ld.shared.f32 %f12, [%r136+2560]; + // inline asm + mov.u32 %r122, %tid.x; + // inline asm + shl.b32 %r137, %r122, 2; + add.s32 %r138, %r137, %r52; + ld.shared.f32 %f13, [%r138+3072]; + // inline asm + mov.u32 %r123, %tid.x; + // inline asm + shl.b32 %r139, %r123, 2; + add.s32 %r140, %r139, %r52; + ld.shared.f32 %f14, [%r140+3584]; + bar.sync 0; + mov.f32 %f90, 0f40000000; + mul.ftz.f32 %f15, %f90, 0f40490FDB; + div.rn.ftz.f32 %f91, %f15, 0f42800000; + // inline asm + mov.u32 %r141, %tid.x; + // inline asm + and.b32 %r144, %r141, 7; + cvt.rn.f32.u32 %f92, %r144; + mul.ftz.f32 %f65, %f91, %f92; + // inline asm + cos.approx.f32 %f62, %f65; + // inline asm + // inline asm + sin.approx.f32 %f64, %f65; + // inline asm + mul.ftz.f32 %f93, %f62, %f1; + neg.f32 %f94, %f64; + fma.rn.ftz.f32 %f95, %f94, %f8, %f93; + mul.ftz.f32 %f96, %f64, %f1; + fma.rn.ftz.f32 %f97, %f62, %f8, %f96; + fma.rn.ftz.f32 %f67, %f91, %f92, %f65; + // inline asm + cos.approx.f32 %f66, %f67; + // inline asm + // inline asm + sin.approx.f32 %f68, %f67; + // inline asm + mul.ftz.f32 %f98, %f66, %f2; + neg.f32 %f99, %f68; + fma.rn.ftz.f32 %f100, %f99, %f9, %f98; + mul.ftz.f32 %f101, %f68, %f2; + fma.rn.ftz.f32 %f102, %f66, %f9, %f101; + mul.ftz.f32 %f73, %f65, 0f40400000; + // inline asm + cos.approx.f32 %f70, %f73; + // inline asm + // inline asm + sin.approx.f32 %f72, %f73; + // inline asm + mul.ftz.f32 %f103, %f70, %f3; + neg.f32 %f104, %f72; + fma.rn.ftz.f32 %f105, %f104, %f10, %f103; + mul.ftz.f32 %f106, %f72, %f3; + fma.rn.ftz.f32 %f107, %f70, %f10, %f106; + mul.ftz.f32 %f77, %f65, 0f40800000; + // inline asm + cos.approx.f32 %f74, %f77; + // inline asm + // inline asm + sin.approx.f32 %f76, %f77; + // inline asm + mul.ftz.f32 %f108, %f74, %f4; + neg.f32 %f109, %f76; + fma.rn.ftz.f32 %f110, %f109, %f11, %f108; + mul.ftz.f32 %f111, %f76, %f4; + fma.rn.ftz.f32 %f112, %f74, %f11, %f111; + mul.ftz.f32 %f81, %f65, 0f40A00000; + // inline asm + cos.approx.f32 %f78, %f81; + // inline asm + // inline asm + sin.approx.f32 %f80, %f81; + // inline asm + mul.ftz.f32 %f113, %f78, %f5; + neg.f32 %f114, %f80; + fma.rn.ftz.f32 %f115, %f114, %f12, %f113; + mul.ftz.f32 %f116, %f80, %f5; + fma.rn.ftz.f32 %f117, %f78, %f12, %f116; + mul.ftz.f32 %f85, %f65, 0f40C00000; + // inline asm + cos.approx.f32 %f82, %f85; + // inline asm + // inline asm + sin.approx.f32 %f84, %f85; + // inline asm + mul.ftz.f32 %f118, %f82, %f6; + neg.f32 %f119, %f84; + fma.rn.ftz.f32 %f120, %f119, %f13, %f118; + mul.ftz.f32 %f121, %f84, %f6; + fma.rn.ftz.f32 %f122, %f82, %f13, %f121; + mul.ftz.f32 %f89, %f65, 0f40E00000; + // inline asm + cos.approx.f32 %f86, %f89; + // inline asm + // inline asm + sin.approx.f32 %f88, %f89; + // inline asm + mul.ftz.f32 %f123, %f86, %f7; + neg.f32 %f124, %f88; + fma.rn.ftz.f32 %f125, %f124, %f14, %f123; + mul.ftz.f32 %f126, %f88, %f7; + fma.rn.ftz.f32 %f127, %f86, %f14, %f126; + add.ftz.f32 %f380, %f51, %f110; + add.ftz.f32 %f381, %f61, %f112; + sub.ftz.f32 %f384, %f51, %f110; + sub.ftz.f32 %f385, %f61, %f112; + add.ftz.f32 %f386, %f100, %f120; + add.ftz.f32 %f387, %f102, %f122; + sub.ftz.f32 %f388, %f100, %f120; + sub.ftz.f32 %f389, %f102, %f122; + add.ftz.f32 %f390, %f95, %f115; + add.ftz.f32 %f391, %f97, %f117; + sub.ftz.f32 %f392, %f95, %f115; + sub.ftz.f32 %f393, %f97, %f117; + add.ftz.f32 %f394, %f105, %f125; + add.ftz.f32 %f395, %f107, %f127; + sub.ftz.f32 %f396, %f105, %f125; + sub.ftz.f32 %f397, %f107, %f127; + add.ftz.f32 %f398, %f380, %f386; + add.ftz.f32 %f399, %f381, %f387; + sub.ftz.f32 %f400, %f380, %f386; + sub.ftz.f32 %f401, %f381, %f387; + neg.ftz.f32 %f129, %f389; + add.ftz.f32 %f406, %f384, %f129; + add.ftz.f32 %f407, %f385, %f388; + sub.ftz.f32 %f408, %f384, %f129; + sub.ftz.f32 %f409, %f385, %f388; + add.ftz.f32 %f410, %f390, %f394; + add.ftz.f32 %f411, %f391, %f395; + sub.ftz.f32 %f412, %f390, %f394; + sub.ftz.f32 %f413, %f391, %f395; + neg.ftz.f32 %f132, %f397; + add.ftz.f32 %f418, %f392, %f132; + add.ftz.f32 %f419, %f393, %f396; + sub.ftz.f32 %f420, %f392, %f132; + sub.ftz.f32 %f421, %f393, %f396; + add.ftz.f32 %f348, %f398, %f410; + add.ftz.f32 %f349, %f399, %f411; + sub.ftz.f32 %f356, %f398, %f410; + sub.ftz.f32 %f357, %f399, %f411; + neg.ftz.f32 %f135, %f419; + add.ftz.f32 %f426, %f135, %f418; + add.ftz.f32 %f427, %f418, %f419; + fma.rn.ftz.f32 %f350, %f426, %f36, %f406; + fma.rn.ftz.f32 %f351, %f427, %f36, %f407; + neg.f32 %f430, %f426; + neg.f32 %f431, %f427; + fma.rn.ftz.f32 %f358, %f430, %f36, %f406; + fma.rn.ftz.f32 %f359, %f431, %f36, %f407; + neg.ftz.f32 %f139, %f413; + add.ftz.f32 %f352, %f400, %f139; + add.ftz.f32 %f353, %f401, %f412; + sub.ftz.f32 %f360, %f400, %f139; + sub.ftz.f32 %f361, %f401, %f412; + neg.ftz.f32 %f142, %f421; + sub.ftz.f32 %f440, %f142, %f420; + sub.ftz.f32 %f441, %f420, %f421; + fma.rn.ftz.f32 %f354, %f440, %f36, %f408; + fma.rn.ftz.f32 %f355, %f441, %f36, %f409; + neg.f32 %f442, %f440; + neg.f32 %f443, %f441; + fma.rn.ftz.f32 %f362, %f442, %f36, %f408; + fma.rn.ftz.f32 %f363, %f443, %f36, %f409; + // inline asm + mov.u32 %r142, %tid.x; + // inline asm + shl.b32 %r145, %r142, 3; + // inline asm + mov.u32 %r143, %tid.x; + // inline asm + and.b32 %r146, %r143, 7; + and.b32 %r147, %r145, 1073741760; + add.s32 %r148, %r147, %r146; + shl.b32 %r149, %r148, 2; + add.s32 %r1, %r52, %r149; + st.shared.f32 [%r1], %f348; + st.shared.f32 [%r1+32], %f350; + st.shared.f32 [%r1+64], %f352; + st.shared.f32 [%r1+96], %f354; + st.shared.f32 [%r1+128], %f356; + st.shared.f32 [%r1+160], %f358; + st.shared.f32 [%r1+192], %f360; + st.shared.f32 [%r1+224], %f362; + bar.sync 0; + // inline asm + mov.u32 %r151, %tid.x; + // inline asm + shl.b32 %r155, %r151, 3; + add.s32 %r157, %r52, %r155; + ld.shared.v2.f32 {%f344, %f345}, [%r157]; + // inline asm + mov.u32 %r152, %tid.x; + // inline asm + shl.b32 %r158, %r152, 3; + add.s32 %r159, %r158, %r52; + ld.shared.v2.f32 {%f282, %f283}, [%r159+1024]; + // inline asm + mov.u32 %r153, %tid.x; + // inline asm + shl.b32 %r160, %r153, 3; + add.s32 %r161, %r160, %r52; + ld.shared.v2.f32 {%f288, %f289}, [%r161+2048]; + // inline asm + mov.u32 %r154, %tid.x; + // inline asm + shl.b32 %r162, %r154, 3; + add.s32 %r163, %r162, %r52; + ld.shared.v2.f32 {%f294, %f295}, [%r163+3072]; + bar.sync 0; + st.shared.f32 [%r1], %f349; + st.shared.f32 [%r1+32], %f351; + st.shared.f32 [%r1+64], %f353; + st.shared.f32 [%r1+96], %f355; + st.shared.f32 [%r1+128], %f357; + st.shared.f32 [%r1+160], %f359; + st.shared.f32 [%r1+192], %f361; + st.shared.f32 [%r1+224], %f363; + bar.sync 0; + // inline asm + mov.u32 %r164, %tid.x; + // inline asm + shl.b32 %r168, %r164, 3; + add.s32 %r170, %r52, %r168; + ld.shared.v2.f32 {%f342, %f343}, [%r170]; + // inline asm + mov.u32 %r165, %tid.x; + // inline asm + shl.b32 %r171, %r165, 3; + add.s32 %r172, %r171, %r52; + ld.shared.v2.f32 {%f284, %f285}, [%r172+1024]; + // inline asm + mov.u32 %r166, %tid.x; + // inline asm + shl.b32 %r173, %r166, 3; + add.s32 %r174, %r173, %r52; + ld.shared.v2.f32 {%f290, %f291}, [%r174+2048]; + // inline asm + mov.u32 %r167, %tid.x; + // inline asm + shl.b32 %r175, %r167, 3; + add.s32 %r176, %r175, %r52; + ld.shared.v2.f32 {%f296, %f297}, [%r176+3072]; + bar.sync 0; + div.rn.ftz.f32 %f184, %f15, 0f43800000; + // inline asm + mov.u32 %r177, %tid.x; + // inline asm + shl.b32 %r180, %r177, 1; + and.b32 %r181, %r180, 62; + cvt.rn.f32.u32 %f185, %r181; + mul.ftz.f32 %f163, %f184, %f185; + // inline asm + cos.approx.f32 %f160, %f163; + // inline asm + // inline asm + sin.approx.f32 %f162, %f163; + // inline asm + mul.ftz.f32 %f187, %f160, %f282; + neg.f32 %f189, %f162; + fma.rn.ftz.f32 %f190, %f189, %f284, %f187; + mul.ftz.f32 %f191, %f162, %f282; + fma.rn.ftz.f32 %f192, %f160, %f284, %f191; + fma.rn.ftz.f32 %f165, %f184, %f185, %f163; + // inline asm + cos.approx.f32 %f164, %f165; + // inline asm + // inline asm + sin.approx.f32 %f166, %f165; + // inline asm + mul.ftz.f32 %f194, %f164, %f288; + neg.f32 %f196, %f166; + fma.rn.ftz.f32 %f197, %f196, %f290, %f194; + mul.ftz.f32 %f198, %f166, %f288; + fma.rn.ftz.f32 %f199, %f164, %f290, %f198; + mul.ftz.f32 %f171, %f163, 0f40400000; + // inline asm + cos.approx.f32 %f168, %f171; + // inline asm + // inline asm + sin.approx.f32 %f170, %f171; + // inline asm + mul.ftz.f32 %f201, %f168, %f294; + neg.f32 %f203, %f170; + fma.rn.ftz.f32 %f204, %f203, %f296, %f201; + mul.ftz.f32 %f205, %f170, %f294; + fma.rn.ftz.f32 %f206, %f168, %f296, %f205; + fma.rn.ftz.f32 %f175, %f184, %f185, %f184; + // inline asm + cos.approx.f32 %f172, %f175; + // inline asm + // inline asm + sin.approx.f32 %f174, %f175; + // inline asm + mul.ftz.f32 %f208, %f172, %f283; + neg.f32 %f210, %f174; + fma.rn.ftz.f32 %f211, %f210, %f285, %f208; + mul.ftz.f32 %f212, %f174, %f283; + fma.rn.ftz.f32 %f213, %f172, %f285, %f212; + add.ftz.f32 %f177, %f175, %f175; + // inline asm + cos.approx.f32 %f176, %f177; + // inline asm + // inline asm + sin.approx.f32 %f178, %f177; + // inline asm + mul.ftz.f32 %f215, %f176, %f289; + neg.f32 %f217, %f178; + fma.rn.ftz.f32 %f218, %f217, %f291, %f215; + mul.ftz.f32 %f219, %f178, %f289; + fma.rn.ftz.f32 %f220, %f176, %f291, %f219; + mul.ftz.f32 %f183, %f175, 0f40400000; + // inline asm + cos.approx.f32 %f180, %f183; + // inline asm + // inline asm + sin.approx.f32 %f182, %f183; + // inline asm + mul.ftz.f32 %f222, %f180, %f295; + neg.f32 %f224, %f182; + fma.rn.ftz.f32 %f225, %f224, %f297, %f222; + mul.ftz.f32 %f226, %f182, %f295; + fma.rn.ftz.f32 %f227, %f180, %f297, %f226; + add.ftz.f32 %f306, %f344, %f197; + add.ftz.f32 %f307, %f342, %f199; + sub.ftz.f32 %f310, %f344, %f197; + sub.ftz.f32 %f311, %f342, %f199; + add.ftz.f32 %f312, %f190, %f204; + add.ftz.f32 %f313, %f192, %f206; + sub.ftz.f32 %f314, %f190, %f204; + sub.ftz.f32 %f315, %f192, %f206; + add.ftz.f32 %f260, %f306, %f312; + add.ftz.f32 %f261, %f307, %f313; + sub.ftz.f32 %f272, %f306, %f312; + sub.ftz.f32 %f273, %f307, %f313; + neg.ftz.f32 %f229, %f315; + add.ftz.f32 %f266, %f310, %f229; + add.ftz.f32 %f267, %f311, %f314; + sub.ftz.f32 %f278, %f310, %f229; + sub.ftz.f32 %f279, %f311, %f314; + add.ftz.f32 %f320, %f345, %f218; + add.ftz.f32 %f321, %f343, %f220; + sub.ftz.f32 %f324, %f345, %f218; + sub.ftz.f32 %f325, %f343, %f220; + add.ftz.f32 %f326, %f211, %f225; + add.ftz.f32 %f327, %f213, %f227; + sub.ftz.f32 %f328, %f211, %f225; + sub.ftz.f32 %f329, %f213, %f227; + add.ftz.f32 %f262, %f320, %f326; + add.ftz.f32 %f263, %f321, %f327; + sub.ftz.f32 %f274, %f320, %f326; + sub.ftz.f32 %f275, %f321, %f327; + neg.ftz.f32 %f232, %f329; + add.ftz.f32 %f268, %f324, %f232; + add.ftz.f32 %f269, %f325, %f328; + sub.ftz.f32 %f280, %f324, %f232; + sub.ftz.f32 %f281, %f325, %f328; + // inline asm + mov.u32 %r178, %tid.x; + // inline asm + shl.b32 %r182, %r178, 2; + // inline asm + mov.u32 %r179, %tid.x; + // inline asm + and.b32 %r183, %r179, 31; + and.b32 %r184, %r182, 536870784; + add.s32 %r185, %r184, %r183; + shl.b32 %r186, %r185, 3; + add.s32 %r188, %r52, %r186; + st.shared.v2.f32 [%r188], {%f260, %f262}; + st.shared.v2.f32 [%r188+256], {%f266, %f268}; + st.shared.v2.f32 [%r188+512], {%f272, %f274}; + st.shared.v2.f32 [%r188+768], {%f278, %f280}; + bar.sync 0; + // inline asm + mov.u32 %r193, %tid.x; + // inline asm + shl.b32 %r197, %r193, 3; + add.s32 %r199, %r52, %r197; + st.shared.v2.f32 [%r199], {%f261, %f263}; + // inline asm + mov.u32 %r194, %tid.x; + // inline asm + shl.b32 %r200, %r194, 3; + add.s32 %r201, %r200, %r52; + st.shared.v2.f32 [%r201+1024], {%f267, %f269}; + // inline asm + mov.u32 %r195, %tid.x; + // inline asm + shl.b32 %r202, %r195, 3; + add.s32 %r203, %r202, %r52; + st.shared.v2.f32 [%r203+2048], {%f273, %f275}; + // inline asm + mov.u32 %r196, %tid.x; + // inline asm + shl.b32 %r204, %r196, 3; + add.s32 %r205, %r204, %r52; + st.shared.v2.f32 [%r205+3072], {%f279, %f281}; + bar.sync 0; + ret; +} + + diff --git a/RTCP/GPUProc/src/UHEP/InvFIR.cl b/RTCP/GPUProc/src/UHEP/InvFIR.cl new file mode 100644 index 0000000000000000000000000000000000000000..1bcaaf72790a9e3b9334b40397ec4e64a84e40e4 --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/InvFIR.cl @@ -0,0 +1,345 @@ +typedef __global float (*InvFIRredDataType)[NR_TABS][NR_POLARIZATIONS][NR_SAMPLES_PER_SUBBAND][1024]; +typedef __global float (*InvFFTedDataType)[NR_TABS][NR_POLARIZATIONS][NR_SAMPLES_PER_SUBBAND + NR_STATION_FILTER_TAPS - 1][1024]; +typedef __global const float16 (*WeightsType)[1024]; + + +__kernel void invFIRfilter(__global void *invFIRredDataPtr, + __global const void *invFFTedDataPtr, + __global const void *weightsPtr) +{ + InvFIRredDataType invFIRredData = (InvFIRredDataType) invFIRredDataPtr; + InvFFTedDataType invFFTedData = (InvFFTedDataType) invFFTedDataPtr; + WeightsType weightsData = (WeightsType) weightsPtr; + + uint sub_time = get_global_id(0); + uint pol = get_global_id(1); + uint tab = get_global_id(2); + +//#pragma OPENCL EXTENSION cl_amd_printf : enable + + const float16 weights = (*weightsData)[sub_time]; + float16 delayLine; + float16 sum; + + delayLine.s0 = (*invFFTedData)[tab][pol][ 0][sub_time]; + delayLine.s1 = (*invFFTedData)[tab][pol][ 1][sub_time]; + delayLine.s2 = (*invFFTedData)[tab][pol][ 2][sub_time]; + delayLine.s3 = (*invFFTedData)[tab][pol][ 3][sub_time]; + delayLine.s4 = (*invFFTedData)[tab][pol][ 4][sub_time]; + delayLine.s5 = (*invFFTedData)[tab][pol][ 5][sub_time]; + delayLine.s6 = (*invFFTedData)[tab][pol][ 6][sub_time]; + delayLine.s7 = (*invFFTedData)[tab][pol][ 7][sub_time]; + delayLine.s8 = (*invFFTedData)[tab][pol][ 8][sub_time]; + delayLine.s9 = (*invFFTedData)[tab][pol][ 9][sub_time]; + delayLine.sA = (*invFFTedData)[tab][pol][10][sub_time]; + delayLine.sB = (*invFFTedData)[tab][pol][11][sub_time]; + delayLine.sC = (*invFFTedData)[tab][pol][12][sub_time]; + delayLine.sD = (*invFFTedData)[tab][pol][13][sub_time]; + delayLine.sE = (*invFFTedData)[tab][pol][14][sub_time]; + + for (uint time = 0; time < NR_SAMPLES_PER_SUBBAND; time += NR_STATION_FILTER_TAPS) { + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 0][sub_time]; + sum.s0 = weights.sF * delayLine.s0; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 1][sub_time]; + sum.s0 += weights.sE * delayLine.s1; + sum.s0 += weights.sD * delayLine.s2; + sum.s0 += weights.sC * delayLine.s3; + sum.s0 += weights.sB * delayLine.s4; + sum.s0 += weights.sA * delayLine.s5; + sum.s0 += weights.s9 * delayLine.s6; + sum.s0 += weights.s8 * delayLine.s7; + sum.s0 += weights.s7 * delayLine.s8; + sum.s0 += weights.s6 * delayLine.s9; + sum.s0 += weights.s5 * delayLine.sA; + sum.s0 += weights.s4 * delayLine.sB; + sum.s0 += weights.s3 * delayLine.sC; + sum.s0 += weights.s2 * delayLine.sD; + sum.s0 += weights.s1 * delayLine.sE; + sum.s0 += weights.s0 * delayLine.sF; + (*invFIRredData)[tab][pol][time + 0][sub_time] = sum.s0; + + sum.s1 = weights.sF * delayLine.s1; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 2][sub_time]; + sum.s1 += weights.sE * delayLine.s2; + sum.s1 += weights.sD * delayLine.s3; + sum.s1 += weights.sC * delayLine.s4; + sum.s1 += weights.sB * delayLine.s5; + sum.s1 += weights.sA * delayLine.s6; + sum.s1 += weights.s9 * delayLine.s7; + sum.s1 += weights.s8 * delayLine.s8; + sum.s1 += weights.s7 * delayLine.s9; + sum.s1 += weights.s6 * delayLine.sA; + sum.s1 += weights.s5 * delayLine.sB; + sum.s1 += weights.s4 * delayLine.sC; + sum.s1 += weights.s3 * delayLine.sD; + sum.s1 += weights.s2 * delayLine.sE; + sum.s1 += weights.s1 * delayLine.sF; + sum.s1 += weights.s0 * delayLine.s0; + (*invFIRredData)[tab][pol][time + 1][sub_time] = sum.s1; + + sum.s2 = weights.sF * delayLine.s2; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 3][sub_time]; + sum.s2 += weights.sE * delayLine.s3; + sum.s2 += weights.sD * delayLine.s4; + sum.s2 += weights.sC * delayLine.s5; + sum.s2 += weights.sB * delayLine.s6; + sum.s2 += weights.sA * delayLine.s7; + sum.s2 += weights.s9 * delayLine.s8; + sum.s2 += weights.s8 * delayLine.s9; + sum.s2 += weights.s7 * delayLine.sA; + sum.s2 += weights.s6 * delayLine.sB; + sum.s2 += weights.s5 * delayLine.sC; + sum.s2 += weights.s4 * delayLine.sD; + sum.s2 += weights.s3 * delayLine.sE; + sum.s2 += weights.s2 * delayLine.sF; + sum.s2 += weights.s1 * delayLine.s0; + sum.s2 += weights.s0 * delayLine.s1; + (*invFIRredData)[tab][pol][time + 2][sub_time] = sum.s2; + + sum.s3 = weights.sF * delayLine.s3; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 4][sub_time]; + sum.s3 += weights.sE * delayLine.s4; + sum.s3 += weights.sD * delayLine.s5; + sum.s3 += weights.sC * delayLine.s6; + sum.s3 += weights.sB * delayLine.s7; + sum.s3 += weights.sA * delayLine.s8; + sum.s3 += weights.s9 * delayLine.s9; + sum.s3 += weights.s8 * delayLine.sA; + sum.s3 += weights.s7 * delayLine.sB; + sum.s3 += weights.s6 * delayLine.sC; + sum.s3 += weights.s5 * delayLine.sD; + sum.s3 += weights.s4 * delayLine.sE; + sum.s3 += weights.s3 * delayLine.sF; + sum.s3 += weights.s2 * delayLine.s0; + sum.s3 += weights.s1 * delayLine.s1; + sum.s3 += weights.s0 * delayLine.s2; + (*invFIRredData)[tab][pol][time + 3][sub_time] = sum.s3; + + sum.s4 = weights.sF * delayLine.s4; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 5][sub_time]; + sum.s4 += weights.sE * delayLine.s5; + sum.s4 += weights.sD * delayLine.s6; + sum.s4 += weights.sC * delayLine.s7; + sum.s4 += weights.sB * delayLine.s8; + sum.s4 += weights.sA * delayLine.s9; + sum.s4 += weights.s9 * delayLine.sA; + sum.s4 += weights.s8 * delayLine.sB; + sum.s4 += weights.s7 * delayLine.sC; + sum.s4 += weights.s6 * delayLine.sD; + sum.s4 += weights.s5 * delayLine.sE; + sum.s4 += weights.s4 * delayLine.sF; + sum.s4 += weights.s3 * delayLine.s0; + sum.s4 += weights.s2 * delayLine.s1; + sum.s4 += weights.s1 * delayLine.s2; + sum.s4 += weights.s0 * delayLine.s3; + (*invFIRredData)[tab][pol][time + 4][sub_time] = sum.s4; + + sum.s5 = weights.sF * delayLine.s5; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 6][sub_time]; + sum.s5 += weights.sE * delayLine.s6; + sum.s5 += weights.sD * delayLine.s7; + sum.s5 += weights.sC * delayLine.s8; + sum.s5 += weights.sB * delayLine.s9; + sum.s5 += weights.sA * delayLine.sA; + sum.s5 += weights.s9 * delayLine.sB; + sum.s5 += weights.s8 * delayLine.sC; + sum.s5 += weights.s7 * delayLine.sD; + sum.s5 += weights.s6 * delayLine.sE; + sum.s5 += weights.s5 * delayLine.sF; + sum.s5 += weights.s4 * delayLine.s0; + sum.s5 += weights.s3 * delayLine.s1; + sum.s5 += weights.s2 * delayLine.s2; + sum.s5 += weights.s1 * delayLine.s3; + sum.s5 += weights.s0 * delayLine.s4; + (*invFIRredData)[tab][pol][time + 5][sub_time] = sum.s5; + + sum.s6 = weights.sF * delayLine.s6; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 7][sub_time]; + sum.s6 += weights.sE * delayLine.s7; + sum.s6 += weights.sD * delayLine.s8; + sum.s6 += weights.sC * delayLine.s9; + sum.s6 += weights.sB * delayLine.sA; + sum.s6 += weights.sA * delayLine.sB; + sum.s6 += weights.s9 * delayLine.sC; + sum.s6 += weights.s8 * delayLine.sD; + sum.s6 += weights.s7 * delayLine.sE; + sum.s6 += weights.s6 * delayLine.sF; + sum.s6 += weights.s5 * delayLine.s0; + sum.s6 += weights.s4 * delayLine.s1; + sum.s6 += weights.s3 * delayLine.s2; + sum.s6 += weights.s2 * delayLine.s3; + sum.s6 += weights.s1 * delayLine.s4; + sum.s6 += weights.s0 * delayLine.s5; + (*invFIRredData)[tab][pol][time + 6][sub_time] = sum.s6; + + sum.s7 = weights.sF * delayLine.s7; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 8][sub_time]; + sum.s7 += weights.sE * delayLine.s8; + sum.s7 += weights.sD * delayLine.s9; + sum.s7 += weights.sC * delayLine.sA; + sum.s7 += weights.sB * delayLine.sB; + sum.s7 += weights.sA * delayLine.sC; + sum.s7 += weights.s9 * delayLine.sD; + sum.s7 += weights.s8 * delayLine.sE; + sum.s7 += weights.s7 * delayLine.sF; + sum.s7 += weights.s6 * delayLine.s0; + sum.s7 += weights.s5 * delayLine.s1; + sum.s7 += weights.s4 * delayLine.s2; + sum.s7 += weights.s3 * delayLine.s3; + sum.s7 += weights.s2 * delayLine.s4; + sum.s7 += weights.s1 * delayLine.s5; + sum.s7 += weights.s0 * delayLine.s6; + (*invFIRredData)[tab][pol][time + 7][sub_time] = sum.s7; + + sum.s8 = weights.sF * delayLine.s8; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 9][sub_time]; + sum.s8 += weights.sE * delayLine.s9; + sum.s8 += weights.sD * delayLine.sA; + sum.s8 += weights.sC * delayLine.sB; + sum.s8 += weights.sB * delayLine.sC; + sum.s8 += weights.sA * delayLine.sD; + sum.s8 += weights.s9 * delayLine.sE; + sum.s8 += weights.s8 * delayLine.sF; + sum.s8 += weights.s7 * delayLine.s0; + sum.s8 += weights.s6 * delayLine.s1; + sum.s8 += weights.s5 * delayLine.s2; + sum.s8 += weights.s4 * delayLine.s3; + sum.s8 += weights.s3 * delayLine.s4; + sum.s8 += weights.s2 * delayLine.s5; + sum.s8 += weights.s1 * delayLine.s6; + sum.s8 += weights.s0 * delayLine.s7; + (*invFIRredData)[tab][pol][time + 8][sub_time] = sum.s8; + + sum.s9 = weights.sF * delayLine.s9; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 10][sub_time]; + sum.s9 += weights.sE * delayLine.sA; + sum.s9 += weights.sD * delayLine.sB; + sum.s9 += weights.sC * delayLine.sC; + sum.s9 += weights.sB * delayLine.sD; + sum.s9 += weights.sA * delayLine.sE; + sum.s9 += weights.s9 * delayLine.sF; + sum.s9 += weights.s8 * delayLine.s0; + sum.s9 += weights.s7 * delayLine.s1; + sum.s9 += weights.s6 * delayLine.s2; + sum.s9 += weights.s5 * delayLine.s3; + sum.s9 += weights.s4 * delayLine.s4; + sum.s9 += weights.s3 * delayLine.s5; + sum.s9 += weights.s2 * delayLine.s6; + sum.s9 += weights.s1 * delayLine.s7; + sum.s9 += weights.s0 * delayLine.s8; + (*invFIRredData)[tab][pol][time + 9][sub_time] = sum.s9; + + sum.sA = weights.sF * delayLine.sA; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 11][sub_time]; + sum.sA += weights.sE * delayLine.sB; + sum.sA += weights.sD * delayLine.sC; + sum.sA += weights.sC * delayLine.sD; + sum.sA += weights.sB * delayLine.sE; + sum.sA += weights.sA * delayLine.sF; + sum.sA += weights.s9 * delayLine.s0; + sum.sA += weights.s8 * delayLine.s1; + sum.sA += weights.s7 * delayLine.s2; + sum.sA += weights.s6 * delayLine.s3; + sum.sA += weights.s5 * delayLine.s4; + sum.sA += weights.s4 * delayLine.s5; + sum.sA += weights.s3 * delayLine.s6; + sum.sA += weights.s2 * delayLine.s7; + sum.sA += weights.s1 * delayLine.s8; + sum.sA += weights.s0 * delayLine.s9; + (*invFIRredData)[tab][pol][time + 10][sub_time] = sum.sA; + + sum.sB = weights.sF * delayLine.sB; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 12][sub_time]; + sum.sB += weights.sE * delayLine.sC; + sum.sB += weights.sD * delayLine.sD; + sum.sB += weights.sC * delayLine.sE; + sum.sB += weights.sB * delayLine.sF; + sum.sB += weights.sA * delayLine.s0; + sum.sB += weights.s9 * delayLine.s1; + sum.sB += weights.s8 * delayLine.s2; + sum.sB += weights.s7 * delayLine.s3; + sum.sB += weights.s6 * delayLine.s4; + sum.sB += weights.s5 * delayLine.s5; + sum.sB += weights.s4 * delayLine.s6; + sum.sB += weights.s3 * delayLine.s7; + sum.sB += weights.s2 * delayLine.s8; + sum.sB += weights.s1 * delayLine.s9; + sum.sB += weights.s0 * delayLine.sA; + (*invFIRredData)[tab][pol][time + 11][sub_time] = sum.sB; + + sum.sC = weights.sF * delayLine.sC; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 13][sub_time]; + sum.sC += weights.sE * delayLine.sD; + sum.sC += weights.sD * delayLine.sE; + sum.sC += weights.sC * delayLine.sF; + sum.sC += weights.sB * delayLine.s0; + sum.sC += weights.sA * delayLine.s1; + sum.sC += weights.s9 * delayLine.s2; + sum.sC += weights.s8 * delayLine.s3; + sum.sC += weights.s7 * delayLine.s4; + sum.sC += weights.s6 * delayLine.s5; + sum.sC += weights.s5 * delayLine.s6; + sum.sC += weights.s4 * delayLine.s7; + sum.sC += weights.s3 * delayLine.s8; + sum.sC += weights.s2 * delayLine.s9; + sum.sC += weights.s1 * delayLine.sA; + sum.sC += weights.s0 * delayLine.sB; + (*invFIRredData)[tab][pol][time + 12][sub_time] = sum.sC; + + sum.sD = weights.sF * delayLine.sD; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 14][sub_time]; + sum.sD += weights.sE * delayLine.sE; + sum.sD += weights.sD * delayLine.sF; + sum.sD += weights.sC * delayLine.s0; + sum.sD += weights.sB * delayLine.s1; + sum.sD += weights.sA * delayLine.s2; + sum.sD += weights.s9 * delayLine.s3; + sum.sD += weights.s8 * delayLine.s4; + sum.sD += weights.s7 * delayLine.s5; + sum.sD += weights.s6 * delayLine.s6; + sum.sD += weights.s5 * delayLine.s7; + sum.sD += weights.s4 * delayLine.s8; + sum.sD += weights.s3 * delayLine.s9; + sum.sD += weights.s2 * delayLine.sA; + sum.sD += weights.s1 * delayLine.sB; + sum.sD += weights.s0 * delayLine.sC; + (*invFIRredData)[tab][pol][time + 13][sub_time] = sum.sD; + + sum.sE = weights.sF * delayLine.sE; + delayLine.sF = (*invFFTedData)[tab][pol][time + NR_STATION_FILTER_TAPS - 1 + 15][sub_time]; + sum.sE += weights.sE * delayLine.sF; + sum.sE += weights.sD * delayLine.s0; + sum.sE += weights.sC * delayLine.s1; + sum.sE += weights.sB * delayLine.s2; + sum.sE += weights.sA * delayLine.s3; + sum.sE += weights.s9 * delayLine.s4; + sum.sE += weights.s8 * delayLine.s5; + sum.sE += weights.s7 * delayLine.s6; + sum.sE += weights.s6 * delayLine.s7; + sum.sE += weights.s5 * delayLine.s8; + sum.sE += weights.s4 * delayLine.s9; + sum.sE += weights.s3 * delayLine.sA; + sum.sE += weights.s2 * delayLine.sB; + sum.sE += weights.s1 * delayLine.sC; + sum.sE += weights.s0 * delayLine.sD; + (*invFIRredData)[tab][pol][time + 14][sub_time] = sum.sE; + + sum.sF = weights.sF * delayLine.sF; + sum.sF += weights.sE * delayLine.s0; + sum.sF += weights.sD * delayLine.s1; + sum.sF += weights.sC * delayLine.s2; + sum.sF += weights.sB * delayLine.s3; + sum.sF += weights.sA * delayLine.s4; + sum.sF += weights.s9 * delayLine.s5; + sum.sF += weights.s8 * delayLine.s6; + sum.sF += weights.s7 * delayLine.s7; + sum.sF += weights.s6 * delayLine.s8; + sum.sF += weights.s5 * delayLine.s9; + sum.sF += weights.s4 * delayLine.sA; + sum.sF += weights.s3 * delayLine.sB; + sum.sF += weights.s2 * delayLine.sC; + sum.sF += weights.s1 * delayLine.sD; + sum.sF += weights.s0 * delayLine.sE; + (*invFIRredData)[tab][pol][time + 15][sub_time] = sum.sF; + } +} diff --git a/RTCP/GPUProc/src/UHEP/InvFIR.cl-0.ptx b/RTCP/GPUProc/src/UHEP/InvFIR.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..2179b103dc77531446838556584310d90c722f7b --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/InvFIR.cl-0.ptx @@ -0,0 +1,447 @@ +// +// Generated by NVIDIA NVVM Compiler +// Compiler built on Tue Feb 7 07:15:59 2012 (1328595359) +// Driver 295.20 +// + +.version 3.0 +.target sm_21, texmode_independent +.address_size 32 + + +.entry invFIRfilter( + .param .u32 .ptr .global .align 1 invFIRfilter_param_0, + .param .u32 .ptr .global .align 1 invFIRfilter_param_1, + .param .u32 .ptr .global .align 1 invFIRfilter_param_2 +) +{ + .reg .f32 %f<386>; + .reg .pred %p<2>; + .reg .s32 %r<71>; + + + ld.param.u32 %r21, [invFIRfilter_param_0]; + ld.param.u32 %r22, [invFIRfilter_param_1]; + ld.param.u32 %r23, [invFIRfilter_param_2]; + // inline asm + mov.u32 %r7, %envreg3; + // inline asm + // inline asm + mov.u32 %r8, %ntid.x; + // inline asm + // inline asm + mov.u32 %r9, %ctaid.x; + // inline asm + mul.lo.s32 %r24, %r9, %r8; + // inline asm + mov.u32 %r10, %tid.x; + // inline asm + add.s32 %r25, %r10, %r7; + mad.lo.s32 %r26, %r9, %r8, %r25; + // inline asm + mov.u32 %r11, %envreg4; + // inline asm + // inline asm + mov.u32 %r12, %ntid.y; + // inline asm + // inline asm + mov.u32 %r13, %ctaid.y; + // inline asm + mul.lo.s32 %r27, %r13, %r12; + // inline asm + mov.u32 %r14, %tid.y; + // inline asm + add.s32 %r28, %r14, %r11; + mad.lo.s32 %r29, %r13, %r12, %r28; + // inline asm + mov.u32 %r15, %envreg5; + // inline asm + // inline asm + mov.u32 %r16, %ntid.z; + // inline asm + // inline asm + mov.u32 %r17, %ctaid.z; + // inline asm + mul.lo.s32 %r30, %r17, %r16; + // inline asm + mov.u32 %r18, %tid.z; + // inline asm + add.s32 %r31, %r18, %r15; + mad.lo.s32 %r32, %r17, %r16, %r31; + shl.b32 %r33, %r26, 6; + add.s32 %r34, %r23, %r33; + mad.lo.s32 %r35, %r32, 8511488, %r22; + mad.lo.s32 %r36, %r29, 4255744, %r35; + shl.b32 %r37, %r26, 2; + add.s32 %r38, %r36, %r37; + ld.global.f32 %f17, [%r38]; + mov.u32 %r70, 0; + ld.global.f32 %f18, [%r38+4096]; + ld.global.f32 %f19, [%r38+8192]; + ld.global.f32 %f20, [%r38+12288]; + ld.global.f32 %f21, [%r38+16384]; + ld.global.f32 %f22, [%r38+20480]; + ld.global.f32 %f23, [%r38+24576]; + ld.global.f32 %f24, [%r38+28672]; + ld.global.f32 %f25, [%r38+32768]; + ld.global.f32 %f26, [%r38+36864]; + ld.global.f32 %f27, [%r38+40960]; + ld.global.f32 %f28, [%r38+45056]; + ld.global.f32 %f29, [%r38+49152]; + ld.global.f32 %f30, [%r38+53248]; + ld.global.f32 %f31, [%r38+57344]; + ld.global.v4.f32 {%f367, %f368, %f369, %f370}, [%r34+48]; + ld.global.v4.f32 {%f371, %f372, %f373, %f374}, [%r34+32]; + ld.global.v4.f32 {%f375, %f376, %f377, %f378}, [%r34+16]; + ld.global.v4.f32 {%f379, %f380, %f381, %f382}, [%r34]; + shl.b32 %r39, %r30, 23; + shl.b32 %r40, %r15, 23; + add.s32 %r41, %r39, %r40; + shl.b32 %r42, %r18, 23; + add.s32 %r43, %r41, %r42; + shl.b32 %r44, %r27, 22; + add.s32 %r45, %r43, %r44; + shl.b32 %r46, %r11, 22; + add.s32 %r47, %r45, %r46; + shl.b32 %r48, %r14, 22; + add.s32 %r49, %r47, %r48; + shl.b32 %r50, %r24, 2; + add.s32 %r51, %r49, %r50; + shl.b32 %r52, %r7, 2; + add.s32 %r53, %r51, %r52; + shl.b32 %r54, %r10, 2; + add.s32 %r55, %r53, %r54; + add.s32 %r56, %r55, %r21; + add.s32 %r1, %r56, 32768; + mul.lo.s32 %r57, %r15, 8511488; + mad.lo.s32 %r58, %r30, 8511488, %r57; + mad.lo.s32 %r59, %r18, 8511488, %r58; + mad.lo.s32 %r60, %r27, 4255744, %r59; + mad.lo.s32 %r61, %r11, 4255744, %r60; + mad.lo.s32 %r62, %r14, 4255744, %r61; + add.s32 %r63, %r62, %r50; + add.s32 %r64, %r63, %r52; + add.s32 %r65, %r64, %r54; + add.s32 %r66, %r65, %r22; + add.s32 %r2, %r66, 122880; + mov.f32 %f347, %f25; + mov.f32 %f348, %f26; + mov.f32 %f349, %f27; + mov.f32 %f350, %f28; + mov.f32 %f335, %f21; + mov.f32 %f336, %f22; + mov.f32 %f337, %f23; + mov.f32 %f338, %f24; + mov.f32 %f323, %f17; + mov.f32 %f324, %f18; + mov.f32 %f325, %f19; + mov.f32 %f326, %f20; + mov.f32 %f383, %f29; + mov.f32 %f384, %f30; + mov.f32 %f385, %f31; + mov.f32 %f362, %f32; + mov.u32 %r69, 64; + +BB0_1: + mul.ftz.f32 %f35, %f369, %f324; + fma.rn.ftz.f32 %f36, %f370, %f323, %f35; + fma.rn.ftz.f32 %f38, %f368, %f325, %f36; + fma.rn.ftz.f32 %f40, %f367, %f326, %f38; + fma.rn.ftz.f32 %f42, %f374, %f335, %f40; + fma.rn.ftz.f32 %f44, %f373, %f336, %f42; + fma.rn.ftz.f32 %f46, %f372, %f337, %f44; + fma.rn.ftz.f32 %f48, %f371, %f338, %f46; + fma.rn.ftz.f32 %f50, %f378, %f347, %f48; + fma.rn.ftz.f32 %f52, %f377, %f348, %f50; + fma.rn.ftz.f32 %f54, %f376, %f349, %f52; + fma.rn.ftz.f32 %f56, %f375, %f350, %f54; + fma.rn.ftz.f32 %f58, %f382, %f383, %f56; + fma.rn.ftz.f32 %f60, %f381, %f384, %f58; + fma.rn.ftz.f32 %f62, %f380, %f385, %f60; + add.s32 %r67, %r2, %r70; + ld.global.f32 %f63, [%r67+-57344]; + fma.rn.ftz.f32 %f64, %f379, %f63, %f62; + add.s32 %r68, %r1, %r70; + st.global.f32 [%r68+-32768], %f64; + mul.ftz.f32 %f65, %f369, %f325; + fma.rn.ftz.f32 %f66, %f370, %f324, %f65; + fma.rn.ftz.f32 %f67, %f368, %f326, %f66; + fma.rn.ftz.f32 %f68, %f367, %f335, %f67; + fma.rn.ftz.f32 %f69, %f374, %f336, %f68; + fma.rn.ftz.f32 %f70, %f373, %f337, %f69; + fma.rn.ftz.f32 %f71, %f372, %f338, %f70; + fma.rn.ftz.f32 %f72, %f371, %f347, %f71; + fma.rn.ftz.f32 %f73, %f378, %f348, %f72; + fma.rn.ftz.f32 %f74, %f377, %f349, %f73; + fma.rn.ftz.f32 %f75, %f376, %f350, %f74; + fma.rn.ftz.f32 %f76, %f375, %f383, %f75; + fma.rn.ftz.f32 %f77, %f382, %f384, %f76; + fma.rn.ftz.f32 %f78, %f381, %f385, %f77; + ld.global.f32 %f79, [%r67+-53248]; + fma.rn.ftz.f32 %f80, %f380, %f79, %f78; + fma.rn.ftz.f32 %f81, %f379, %f323, %f80; + st.global.f32 [%r68+-28672], %f81; + mul.ftz.f32 %f82, %f369, %f326; + fma.rn.ftz.f32 %f83, %f370, %f325, %f82; + fma.rn.ftz.f32 %f84, %f368, %f335, %f83; + fma.rn.ftz.f32 %f85, %f367, %f336, %f84; + fma.rn.ftz.f32 %f86, %f374, %f337, %f85; + fma.rn.ftz.f32 %f87, %f373, %f338, %f86; + fma.rn.ftz.f32 %f88, %f372, %f347, %f87; + fma.rn.ftz.f32 %f89, %f371, %f348, %f88; + fma.rn.ftz.f32 %f90, %f378, %f349, %f89; + fma.rn.ftz.f32 %f91, %f377, %f350, %f90; + fma.rn.ftz.f32 %f92, %f376, %f383, %f91; + fma.rn.ftz.f32 %f93, %f375, %f384, %f92; + fma.rn.ftz.f32 %f94, %f382, %f385, %f93; + ld.global.f32 %f95, [%r67+-49152]; + fma.rn.ftz.f32 %f96, %f381, %f95, %f94; + fma.rn.ftz.f32 %f97, %f380, %f323, %f96; + fma.rn.ftz.f32 %f98, %f379, %f324, %f97; + st.global.f32 [%r68+-24576], %f98; + mul.ftz.f32 %f99, %f369, %f335; + fma.rn.ftz.f32 %f100, %f370, %f326, %f99; + fma.rn.ftz.f32 %f101, %f368, %f336, %f100; + fma.rn.ftz.f32 %f102, %f367, %f337, %f101; + fma.rn.ftz.f32 %f103, %f374, %f338, %f102; + fma.rn.ftz.f32 %f104, %f373, %f347, %f103; + fma.rn.ftz.f32 %f105, %f372, %f348, %f104; + fma.rn.ftz.f32 %f106, %f371, %f349, %f105; + fma.rn.ftz.f32 %f107, %f378, %f350, %f106; + fma.rn.ftz.f32 %f108, %f377, %f383, %f107; + fma.rn.ftz.f32 %f109, %f376, %f384, %f108; + fma.rn.ftz.f32 %f110, %f375, %f385, %f109; + ld.global.f32 %f111, [%r67+-45056]; + fma.rn.ftz.f32 %f112, %f382, %f111, %f110; + fma.rn.ftz.f32 %f113, %f381, %f323, %f112; + fma.rn.ftz.f32 %f114, %f380, %f324, %f113; + fma.rn.ftz.f32 %f115, %f379, %f325, %f114; + st.global.f32 [%r68+-20480], %f115; + mul.ftz.f32 %f116, %f369, %f336; + fma.rn.ftz.f32 %f117, %f370, %f335, %f116; + fma.rn.ftz.f32 %f118, %f368, %f337, %f117; + fma.rn.ftz.f32 %f119, %f367, %f338, %f118; + fma.rn.ftz.f32 %f120, %f374, %f347, %f119; + fma.rn.ftz.f32 %f121, %f373, %f348, %f120; + fma.rn.ftz.f32 %f122, %f372, %f349, %f121; + fma.rn.ftz.f32 %f123, %f371, %f350, %f122; + fma.rn.ftz.f32 %f124, %f378, %f383, %f123; + fma.rn.ftz.f32 %f125, %f377, %f384, %f124; + fma.rn.ftz.f32 %f126, %f376, %f385, %f125; + ld.global.f32 %f127, [%r67+-40960]; + fma.rn.ftz.f32 %f128, %f375, %f127, %f126; + fma.rn.ftz.f32 %f129, %f382, %f323, %f128; + fma.rn.ftz.f32 %f130, %f381, %f324, %f129; + fma.rn.ftz.f32 %f131, %f380, %f325, %f130; + fma.rn.ftz.f32 %f132, %f379, %f326, %f131; + st.global.f32 [%r68+-16384], %f132; + mul.ftz.f32 %f133, %f369, %f337; + fma.rn.ftz.f32 %f134, %f370, %f336, %f133; + fma.rn.ftz.f32 %f135, %f368, %f338, %f134; + fma.rn.ftz.f32 %f136, %f367, %f347, %f135; + fma.rn.ftz.f32 %f137, %f374, %f348, %f136; + fma.rn.ftz.f32 %f138, %f373, %f349, %f137; + fma.rn.ftz.f32 %f139, %f372, %f350, %f138; + fma.rn.ftz.f32 %f140, %f371, %f383, %f139; + fma.rn.ftz.f32 %f141, %f378, %f384, %f140; + fma.rn.ftz.f32 %f142, %f377, %f385, %f141; + ld.global.f32 %f143, [%r67+-36864]; + fma.rn.ftz.f32 %f144, %f376, %f143, %f142; + fma.rn.ftz.f32 %f145, %f375, %f323, %f144; + fma.rn.ftz.f32 %f146, %f382, %f324, %f145; + fma.rn.ftz.f32 %f147, %f381, %f325, %f146; + fma.rn.ftz.f32 %f148, %f380, %f326, %f147; + fma.rn.ftz.f32 %f149, %f379, %f335, %f148; + st.global.f32 [%r68+-12288], %f149; + mul.ftz.f32 %f150, %f369, %f338; + fma.rn.ftz.f32 %f151, %f370, %f337, %f150; + fma.rn.ftz.f32 %f152, %f368, %f347, %f151; + fma.rn.ftz.f32 %f153, %f367, %f348, %f152; + fma.rn.ftz.f32 %f154, %f374, %f349, %f153; + fma.rn.ftz.f32 %f155, %f373, %f350, %f154; + fma.rn.ftz.f32 %f156, %f372, %f383, %f155; + fma.rn.ftz.f32 %f157, %f371, %f384, %f156; + fma.rn.ftz.f32 %f158, %f378, %f385, %f157; + ld.global.f32 %f159, [%r67+-32768]; + fma.rn.ftz.f32 %f160, %f377, %f159, %f158; + fma.rn.ftz.f32 %f161, %f376, %f323, %f160; + fma.rn.ftz.f32 %f162, %f375, %f324, %f161; + fma.rn.ftz.f32 %f163, %f382, %f325, %f162; + fma.rn.ftz.f32 %f164, %f381, %f326, %f163; + fma.rn.ftz.f32 %f165, %f380, %f335, %f164; + fma.rn.ftz.f32 %f166, %f379, %f336, %f165; + st.global.f32 [%r68+-8192], %f166; + mul.ftz.f32 %f167, %f369, %f347; + fma.rn.ftz.f32 %f168, %f370, %f338, %f167; + fma.rn.ftz.f32 %f169, %f368, %f348, %f168; + fma.rn.ftz.f32 %f170, %f367, %f349, %f169; + fma.rn.ftz.f32 %f171, %f374, %f350, %f170; + fma.rn.ftz.f32 %f172, %f373, %f383, %f171; + fma.rn.ftz.f32 %f173, %f372, %f384, %f172; + fma.rn.ftz.f32 %f174, %f371, %f385, %f173; + ld.global.f32 %f175, [%r67+-28672]; + fma.rn.ftz.f32 %f176, %f378, %f175, %f174; + fma.rn.ftz.f32 %f177, %f377, %f323, %f176; + fma.rn.ftz.f32 %f178, %f376, %f324, %f177; + fma.rn.ftz.f32 %f179, %f375, %f325, %f178; + fma.rn.ftz.f32 %f180, %f382, %f326, %f179; + fma.rn.ftz.f32 %f181, %f381, %f335, %f180; + fma.rn.ftz.f32 %f182, %f380, %f336, %f181; + fma.rn.ftz.f32 %f183, %f379, %f337, %f182; + st.global.f32 [%r68+-4096], %f183; + mul.ftz.f32 %f184, %f369, %f348; + fma.rn.ftz.f32 %f185, %f370, %f347, %f184; + fma.rn.ftz.f32 %f186, %f368, %f349, %f185; + fma.rn.ftz.f32 %f187, %f367, %f350, %f186; + fma.rn.ftz.f32 %f188, %f374, %f383, %f187; + fma.rn.ftz.f32 %f189, %f373, %f384, %f188; + fma.rn.ftz.f32 %f190, %f372, %f385, %f189; + ld.global.f32 %f191, [%r67+-24576]; + fma.rn.ftz.f32 %f192, %f371, %f191, %f190; + fma.rn.ftz.f32 %f193, %f378, %f323, %f192; + fma.rn.ftz.f32 %f194, %f377, %f324, %f193; + fma.rn.ftz.f32 %f195, %f376, %f325, %f194; + fma.rn.ftz.f32 %f196, %f375, %f326, %f195; + fma.rn.ftz.f32 %f197, %f382, %f335, %f196; + fma.rn.ftz.f32 %f198, %f381, %f336, %f197; + fma.rn.ftz.f32 %f199, %f380, %f337, %f198; + fma.rn.ftz.f32 %f200, %f379, %f338, %f199; + st.global.f32 [%r68], %f200; + mul.ftz.f32 %f201, %f369, %f349; + fma.rn.ftz.f32 %f202, %f370, %f348, %f201; + fma.rn.ftz.f32 %f203, %f368, %f350, %f202; + fma.rn.ftz.f32 %f204, %f367, %f383, %f203; + fma.rn.ftz.f32 %f205, %f374, %f384, %f204; + fma.rn.ftz.f32 %f206, %f373, %f385, %f205; + ld.global.f32 %f207, [%r67+-20480]; + fma.rn.ftz.f32 %f208, %f372, %f207, %f206; + fma.rn.ftz.f32 %f209, %f371, %f323, %f208; + fma.rn.ftz.f32 %f210, %f378, %f324, %f209; + fma.rn.ftz.f32 %f211, %f377, %f325, %f210; + fma.rn.ftz.f32 %f212, %f376, %f326, %f211; + fma.rn.ftz.f32 %f213, %f375, %f335, %f212; + fma.rn.ftz.f32 %f214, %f382, %f336, %f213; + fma.rn.ftz.f32 %f215, %f381, %f337, %f214; + fma.rn.ftz.f32 %f216, %f380, %f338, %f215; + fma.rn.ftz.f32 %f217, %f379, %f347, %f216; + st.global.f32 [%r68+4096], %f217; + mul.ftz.f32 %f218, %f369, %f350; + fma.rn.ftz.f32 %f219, %f370, %f349, %f218; + fma.rn.ftz.f32 %f220, %f368, %f383, %f219; + fma.rn.ftz.f32 %f221, %f367, %f384, %f220; + fma.rn.ftz.f32 %f222, %f374, %f385, %f221; + ld.global.f32 %f223, [%r67+-16384]; + fma.rn.ftz.f32 %f224, %f373, %f223, %f222; + fma.rn.ftz.f32 %f225, %f372, %f323, %f224; + fma.rn.ftz.f32 %f226, %f371, %f324, %f225; + fma.rn.ftz.f32 %f227, %f378, %f325, %f226; + fma.rn.ftz.f32 %f228, %f377, %f326, %f227; + fma.rn.ftz.f32 %f229, %f376, %f335, %f228; + fma.rn.ftz.f32 %f230, %f375, %f336, %f229; + fma.rn.ftz.f32 %f231, %f382, %f337, %f230; + fma.rn.ftz.f32 %f232, %f381, %f338, %f231; + fma.rn.ftz.f32 %f233, %f380, %f347, %f232; + fma.rn.ftz.f32 %f234, %f379, %f348, %f233; + st.global.f32 [%r68+8192], %f234; + mul.ftz.f32 %f235, %f369, %f383; + fma.rn.ftz.f32 %f236, %f370, %f350, %f235; + fma.rn.ftz.f32 %f237, %f368, %f384, %f236; + fma.rn.ftz.f32 %f238, %f367, %f385, %f237; + ld.global.f32 %f239, [%r67+-12288]; + fma.rn.ftz.f32 %f240, %f374, %f239, %f238; + fma.rn.ftz.f32 %f241, %f373, %f323, %f240; + fma.rn.ftz.f32 %f242, %f372, %f324, %f241; + fma.rn.ftz.f32 %f243, %f371, %f325, %f242; + fma.rn.ftz.f32 %f244, %f378, %f326, %f243; + fma.rn.ftz.f32 %f245, %f377, %f335, %f244; + fma.rn.ftz.f32 %f246, %f376, %f336, %f245; + fma.rn.ftz.f32 %f247, %f375, %f337, %f246; + fma.rn.ftz.f32 %f248, %f382, %f338, %f247; + fma.rn.ftz.f32 %f249, %f381, %f347, %f248; + fma.rn.ftz.f32 %f250, %f380, %f348, %f249; + fma.rn.ftz.f32 %f251, %f379, %f349, %f250; + st.global.f32 [%r68+12288], %f251; + mul.ftz.f32 %f252, %f369, %f384; + fma.rn.ftz.f32 %f253, %f370, %f383, %f252; + fma.rn.ftz.f32 %f254, %f368, %f385, %f253; + ld.global.f32 %f255, [%r67+-8192]; + fma.rn.ftz.f32 %f256, %f367, %f255, %f254; + fma.rn.ftz.f32 %f257, %f374, %f323, %f256; + fma.rn.ftz.f32 %f258, %f373, %f324, %f257; + fma.rn.ftz.f32 %f259, %f372, %f325, %f258; + fma.rn.ftz.f32 %f260, %f371, %f326, %f259; + fma.rn.ftz.f32 %f261, %f378, %f335, %f260; + fma.rn.ftz.f32 %f262, %f377, %f336, %f261; + fma.rn.ftz.f32 %f263, %f376, %f337, %f262; + fma.rn.ftz.f32 %f264, %f375, %f338, %f263; + fma.rn.ftz.f32 %f265, %f382, %f347, %f264; + fma.rn.ftz.f32 %f266, %f381, %f348, %f265; + fma.rn.ftz.f32 %f267, %f380, %f349, %f266; + fma.rn.ftz.f32 %f268, %f379, %f350, %f267; + st.global.f32 [%r68+16384], %f268; + mul.ftz.f32 %f269, %f369, %f385; + fma.rn.ftz.f32 %f270, %f370, %f384, %f269; + ld.global.f32 %f271, [%r67+-4096]; + fma.rn.ftz.f32 %f272, %f368, %f271, %f270; + fma.rn.ftz.f32 %f273, %f367, %f323, %f272; + fma.rn.ftz.f32 %f274, %f374, %f324, %f273; + fma.rn.ftz.f32 %f275, %f373, %f325, %f274; + fma.rn.ftz.f32 %f276, %f372, %f326, %f275; + fma.rn.ftz.f32 %f277, %f371, %f335, %f276; + fma.rn.ftz.f32 %f278, %f378, %f336, %f277; + fma.rn.ftz.f32 %f279, %f377, %f337, %f278; + fma.rn.ftz.f32 %f280, %f376, %f338, %f279; + fma.rn.ftz.f32 %f281, %f375, %f347, %f280; + fma.rn.ftz.f32 %f282, %f382, %f348, %f281; + fma.rn.ftz.f32 %f283, %f381, %f349, %f282; + fma.rn.ftz.f32 %f284, %f380, %f350, %f283; + fma.rn.ftz.f32 %f285, %f379, %f383, %f284; + st.global.f32 [%r68+20480], %f285; + ld.global.f32 %f286, [%r67]; + mul.ftz.f32 %f287, %f369, %f286; + fma.rn.ftz.f32 %f288, %f370, %f385, %f287; + fma.rn.ftz.f32 %f289, %f368, %f323, %f288; + fma.rn.ftz.f32 %f290, %f367, %f324, %f289; + fma.rn.ftz.f32 %f291, %f374, %f325, %f290; + fma.rn.ftz.f32 %f292, %f373, %f326, %f291; + fma.rn.ftz.f32 %f293, %f372, %f335, %f292; + fma.rn.ftz.f32 %f294, %f371, %f336, %f293; + fma.rn.ftz.f32 %f295, %f378, %f337, %f294; + fma.rn.ftz.f32 %f296, %f377, %f338, %f295; + fma.rn.ftz.f32 %f297, %f376, %f347, %f296; + fma.rn.ftz.f32 %f298, %f375, %f348, %f297; + fma.rn.ftz.f32 %f299, %f382, %f349, %f298; + fma.rn.ftz.f32 %f300, %f381, %f350, %f299; + fma.rn.ftz.f32 %f301, %f380, %f383, %f300; + fma.rn.ftz.f32 %f302, %f379, %f384, %f301; + st.global.f32 [%r68+24576], %f302; + mul.ftz.f32 %f303, %f369, %f323; + fma.rn.ftz.f32 %f304, %f370, %f286, %f303; + fma.rn.ftz.f32 %f305, %f368, %f324, %f304; + fma.rn.ftz.f32 %f306, %f367, %f325, %f305; + fma.rn.ftz.f32 %f307, %f374, %f326, %f306; + fma.rn.ftz.f32 %f308, %f373, %f335, %f307; + fma.rn.ftz.f32 %f309, %f372, %f336, %f308; + fma.rn.ftz.f32 %f310, %f371, %f337, %f309; + fma.rn.ftz.f32 %f311, %f378, %f338, %f310; + fma.rn.ftz.f32 %f312, %f377, %f347, %f311; + fma.rn.ftz.f32 %f313, %f376, %f348, %f312; + fma.rn.ftz.f32 %f314, %f375, %f349, %f313; + fma.rn.ftz.f32 %f315, %f382, %f350, %f314; + fma.rn.ftz.f32 %f316, %f381, %f383, %f315; + fma.rn.ftz.f32 %f317, %f380, %f384, %f316; + fma.rn.ftz.f32 %f318, %f379, %f385, %f317; + st.global.f32 [%r68+28672], %f318; + add.s32 %r70, %r70, 65536; + add.s32 %r69, %r69, -1; + setp.ne.s32 %p1, %r69, 0; + mov.f32 %f383, %f383; + mov.f32 %f384, %f384; + mov.f32 %f385, %f385; + mov.f32 %f366, %f286; + @%p1 bra BB0_1; + + ret; +} + + diff --git a/RTCP/GPUProc/src/UHEP/InvertedStationPPFWeights.cc b/RTCP/GPUProc/src/UHEP/InvertedStationPPFWeights.cc new file mode 100644 index 0000000000000000000000000000000000000000..8dd949ce2b73470c5de20cdcbc25469f1808bea0 --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/InvertedStationPPFWeights.cc @@ -0,0 +1,4166 @@ +int reverseSubbandMapping[512] = { + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 0, 1, 2, 3, + 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, + 68, 69, 70, 71, 72, 73, 74, 75, + 76, 77, 78, 79, 80, 81, 82, 83, + 84, 85, 86, 87, 88, 89, 90, 91, + 92, 93, 94, 95, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115, + 116, 117, 118, 119, 120, 121, 122, 123, + 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, + 140, 141, 142, 143, 144, 145, 146, 147, + 148, 149, 150, 151, 152, 153, 154, 155, + 156, 157, 158, 159, 160, 161, 162, 163, + 164, 165, 166, 167, 168, 169, 170, 171, + 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, + 188, 189, 190, 191, 192, 193, 194, 195, + 196, 197, 198, 199, 200, 201, 202, 203, + 204, 205, 206, 207, 208, 209, 210, 211, + 212, 213, 214, 215, 216, 217, 218, 219, + 220, 221, 222, 223, 224, 225, 226, 227, + 228, 229, 230, 231, 232, 233, 234, 235, + 236, 237, 238, 239, 240, 241, 242, 243, + 244, 245, 246, 247, 248, 249, 250, 251, + 252, 253, 254, 255, 256, 257, 258, 259, + 260, 261, 262, 263, 264, 265, 266, 267, + 268, 269, 270, 271, 272, 273, 274, 275, + 276, 277, 278, 279, 280, 281, 282, 283, + 284, 285, 286, 287, 288, 289, 290, 291, + 292, 293, 294, 295, 296, 297, 298, 299, + 300, 301, 302, 303, 304, 305, 306, 307, + 308, 309, 310, 311, 312, 313, 314, 315, + 316, 317, 318, 319, 320, 321, 322, 323, + 324, 325, 326, 327, 328, 329, 330, 331, + 332, 333, 334, 335, 336, 337, 338, 339, + 340, 341, 342, 343, 344, 345, 346, 347, + 348, 349, 350, 351, 352, 353, 354, 355, + 356, 357, 358, 359, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 371, + 372, 373, 374, 375, 376, 377, 378, 379, + 380, 381, 382, 383, 384, 385, 386, 387, + 388, 389, 390, 391, 392, 393, 394, 395, + 396, 397, 398, 399, 400, 401, 402, 403, + 404, 405, 406, 407, 408, 409, 410, 411, + 412, 413, 414, 415, 416, 417, 418, 419, + 420, 421, 422, 423, 424, 425, 426, 427, + 428, 429, 430, 431, 432, 433, 434, 435, + 436, 437, 438, 439, 440, 441, 442, 443, + 444, 445, 446, 447, 448, 449, 450, 451, + 452, 453, 454, 455, 456, 457, 458, 459, + 460, 461, 462, 463, 464, 465, 466, 467, + 468, 469, 470, 471, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, +}; + + +extern const float invertedStationPPFWeights[1024][16] __attribute__ ((aligned(32))) = { + { -0.000179151700, -0.003308169600, 0.003168136400, -0.001402398400, + -0.000353662500, 0.001927602200, -0.003163333400, 0.004012019600, + 0.305533321300, 0.003717851500, -0.003038205600, 0.001856478200, + -0.000314280200, -0.001413956600, 0.003176999800, -0.003309959200}, + { -0.000188174200, -0.003292642400, 0.003165263500, -0.001388720100, + -0.000384077600, 0.001988112900, -0.003285234700, 0.004303401100, + 0.305532062500, 0.003427783000, -0.002906671400, 0.001795892300, + -0.000283721000, -0.001437413900, 0.003189613400, -0.003326162500}, + { -0.000196877000, -0.003287185700, 0.003153111900, -0.001365477700, + -0.000414183100, 0.002048118400, -0.003406922200, 0.004585797400, + 0.305529242100, 0.003148416400, -0.002784621900, 0.001725095700, + -0.000252375300, -0.001452058900, 0.003202823300, -0.003332926300}, + { -0.000205837900, -0.003272003200, 0.003141260600, -0.001352471400, + -0.000443644700, 0.002116848800, -0.003537488300, 0.004878336100, + 0.305525209100, 0.002859642300, -0.002662171400, 0.001663994200, + -0.000211731900, -0.001466550800, 0.003206380900, -0.003349458000}, + { -0.000214552200, -0.003266806700, 0.003129659400, -0.001339690600, + -0.000482746700, 0.002176562700, -0.003658974500, 0.005162307100, + 0.305528831700, 0.002572382100, -0.002530584900, 0.001602837600, + -0.000180271600, -0.001491017200, 0.003220032300, -0.003356689400}, + { -0.000223661700, -0.003260822600, 0.003127233900, -0.001326570100, + -0.000512158000, 0.002235803700, -0.003780111100, 0.005456280000, + 0.305521935800, 0.002275511400, -0.002407952000, 0.001531547700, + -0.000148571000, -0.001505753800, 0.003233233200, -0.003373394500}, + { -0.000231910300, -0.003246569900, 0.003116275100, -0.001304684500, + -0.000541061900, 0.002304368000, -0.003901387700, 0.005741703600, + 0.305522735200, 0.001989240500, -0.002285243100, 0.001469413800, + -0.000116373400, -0.001521319900, 0.003247258700, -0.003380795000}, + { -0.000240501100, -0.003241453500, 0.003104789400, -0.001292085400, + -0.000569919800, 0.002362991000, -0.004022334800, 0.006027997600, + 0.305521780500, 0.001704243000, -0.002153295800, 0.001397822300, + -0.000074381800, -0.001537100900, 0.003251864100, -0.003388377600}, + { -0.000248796500, -0.003226864100, 0.003093528500, -0.001279946600, + -0.000607985400, 0.002421344900, -0.004142487600, 0.006323528600, + 0.305510887600, 0.001418764600, -0.002029953100, 0.001335330400, + -0.000042083300, -0.001561789400, 0.003265175300, -0.003404731400}, + { -0.000257636700, -0.003221504300, 0.003091948000, -0.001267872500, + -0.000636159800, 0.002489119800, -0.004263249000, 0.006610834100, + 0.305507531700, 0.001124928200, -0.001906951100, 0.001263167800, + -0.000009313800, -0.001577770500, 0.003279564200, -0.003412490900}, + { -0.000265507200, -0.003207541700, 0.003081283900, -0.001246591800, + -0.000664350300, 0.002546976200, -0.004383683600, 0.006898956800, + 0.305502674600, 0.000841583200, -0.001774239400, 0.001200242700, + 0.000033135500, -0.001593584100, 0.003293434500, -0.003429250800}, + { -0.000273917600, -0.003202910800, 0.003070451600, -0.001234554400, + -0.000692451500, 0.002604895500, -0.004503987800, 0.007187871700, + 0.305505931400, 0.000558741700, -0.001650952000, 0.001127390700, + 0.000066556300, -0.001619778200, 0.003298773100, -0.003437037600}, + { -0.000281862900, -0.003188993500, 0.003060168000, -0.001223831900, + -0.000729004800, 0.002671831100, -0.004624290900, 0.007477414500, + 0.305498083700, 0.000267125500, -0.001517720800, 0.001063982400, + 0.000099881700, -0.001636003800, 0.003313035200, -0.003454137100}, + { -0.000289997500, -0.003184579100, 0.003050026400, -0.001202902100, + -0.000756520300, 0.002728774500, -0.004744017700, 0.007767318900, + 0.305488765700, -0.000023525100, -0.001394377600, 0.000990953800, + 0.000143244500, -0.001652648300, 0.003327682700, -0.003461774600}, + { -0.000297868500, -0.003170531700, 0.003048908400, -0.001191632000, + -0.000784087900, 0.002786310700, -0.004864404400, 0.008048797600, + 0.305487970600, -0.000304469600, -0.001260861000, 0.000926754800, + 0.000177135000, -0.001679160000, 0.003343036000, -0.003469765900}, + { -0.000306075200, -0.003165955700, 0.003038511100, -0.001180375200, + -0.000811096700, 0.002842935600, -0.004983911700, 0.008340196300, + 0.305476029500, -0.000594272800, -0.001136564500, 0.000852586500, + 0.000211810600, -0.001696380900, 0.003348178400, -0.003486840300}, + { -0.000313813200, -0.003152349500, 0.003028499400, -0.001169925400, + -0.000837361100, 0.002908698000, -0.005103588900, 0.008632676200, + 0.305471690900, -0.000882964600, -0.001002806300, 0.000788301700, + 0.000255669300, -0.001713789300, 0.003363569600, -0.003495167000}, + { -0.000321551300, -0.003148759600, 0.003019394800, -0.001150412500, + -0.000873315500, 0.002965177700, -0.005223260400, 0.008916237200, + 0.305466205000, -0.001170623500, -0.000869571200, 0.000714234800, + 0.000290089600, -0.001740435500, 0.003378711000, -0.003512573100}, + { -0.000329970400, -0.003144017200, 0.003018842400, -0.001139831400, + -0.000899901500, 0.003021134300, -0.005342285700, 0.009209557400, + 0.305459744400, -0.001458744900, -0.000744677800, 0.000649268300, + 0.000334721100, -0.001758288700, 0.003394346200, -0.003520921500}, + { -0.000337260000, -0.003130746200, 0.003009011500, -0.001129494000, + -0.000925937200, 0.003077019500, -0.005461903400, 0.009494961400, + 0.305451127000, -0.001745336100, -0.000610465300, 0.000573925900, + 0.000370535700, -0.001776549100, 0.003400302700, -0.003538373300}, + { -0.000345274700, -0.003126683700, 0.002999325200, -0.001119130300, + -0.000951776200, 0.003132334500, -0.005580569300, 0.009789706100, + 0.305441562500, -0.002031776000, -0.000485477300, 0.000498512700, + 0.000406472900, -0.001794658100, 0.003416172000, -0.003546870600}, + { -0.000352248700, -0.003113898000, 0.002990283900, -0.001099777400, + -0.000977716300, 0.003187647900, -0.005699440100, 0.010075840400, + 0.305430485400, -0.002316996000, -0.000351097200, 0.000433003700, + 0.000451135800, -0.001822254600, 0.003431922800, -0.003564774200}, + { -0.000360263700, -0.003109808600, 0.002990478200, -0.001090243800, + -0.001002860000, 0.003252789700, -0.005809448800, 0.010362986600, + 0.305427509700, -0.002601772700, -0.000216601600, 0.000357013800, + 0.000487681400, -0.001841238600, 0.003448505600, -0.003573744000}, + { -0.000367260100, -0.003096989400, 0.002981370000, -0.001081004600, + -0.001037636700, 0.003307938500, -0.005928189500, 0.010650492300, + 0.305413750100, -0.002886387300, -0.000090648300, 0.000290078700, + 0.000524745900, -0.001860858600, 0.003455849400, -0.003582656500}, + { -0.000375146000, -0.003093318200, 0.002972423700, -0.001071720100, + -0.001062345400, 0.003362096800, -0.006046137200, 0.010948594500, + 0.305407159000, -0.003178921900, 0.000043993500, 0.000214496100, + 0.000570197700, -0.001888981300, 0.003472094500, -0.003601269100}, + { -0.000381874300, -0.003080805300, 0.002963790800, -0.001053001800, + -0.001087365200, 0.003416433800, -0.006164483000, 0.011237344100, + 0.305390383500, -0.003461481100, 0.000179544400, 0.000146785300, + 0.000607413700, -0.001908316200, 0.003489057800, -0.003610639600}, + { -0.000389669200, -0.003076842200, 0.002964194700, -0.001043813600, + -0.001112218000, 0.003470974700, -0.006273264800, 0.011526826700, + 0.305382025700, -0.003743576500, 0.000314498600, 0.000070163400, + 0.000654411200, -0.001927947000, 0.003505495000, -0.003629223000}, + { -0.000396332400, -0.003064410600, 0.002955492900, -0.001034693000, + -0.001136761300, 0.003525266500, -0.006392102100, 0.011808326800, + 0.305371538600, -0.004034754500, 0.000440667500, -0.000007068200, + 0.000692151600, -0.001958085100, 0.003514164000, -0.003638938800}, + { -0.000403882200, -0.003061151700, 0.002947019900, -0.001026023300, + -0.001160855500, 0.003578931400, -0.006510233800, 0.012099735200, + 0.305359759100, -0.004325205000, 0.000577290600, -0.000075620500, + 0.000729983100, -0.001977775600, 0.003531138400, -0.003658356000}, + { -0.000410416700, -0.003048768800, 0.002938122000, -0.001017024400, + -0.001185043200, 0.003632680800, -0.006618462800, 0.012391231100, + 0.305346710600, -0.004604878800, 0.000712646300, -0.000153154100, + 0.000778065700, -0.001998535200, 0.003548917800, -0.003668399300}, + { -0.000418160500, -0.003045285600, 0.002940099300, -0.000999989700, + -0.001207958100, 0.003694939000, -0.006736106600, 0.012683638900, + 0.305331925800, -0.004893293600, 0.000848238100, -0.000231056700, + 0.000815922500, -0.002028355600, 0.003567216600, -0.003678810800}, + { -0.000424402600, -0.003033543100, 0.002932209100, -0.000992319500, + -0.001241358200, 0.003749292200, -0.006845375400, 0.012968326800, + 0.305325129100, -0.005181636100, 0.000984943000, -0.000299850300, + 0.000864607400, -0.002050276300, 0.003575779100, -0.003698495600}, + { -0.000431795400, -0.003030515100, 0.002924143600, -0.000984066100, + -0.001264499500, 0.003801673700, -0.006962423800, 0.013261923500, + 0.305307792800, -0.005469551400, 0.001112105700, -0.000378641900, + 0.000903979800, -0.002071214100, 0.003594054000, -0.003708956400}, + { -0.000438154800, -0.003018644300, 0.002915881100, -0.000975892700, + -0.001287607700, 0.003854346300, -0.007080269600, 0.013547492500, + 0.305299153300, -0.005747166800, 0.001248307000, -0.000457538900, + 0.000942775500, -0.002101520100, 0.003612350900, -0.003729050600}, + { -0.000445068000, -0.003016231600, 0.002908760100, -0.000958623000, + -0.001311089600, 0.003906976500, -0.007187486600, 0.013842019100, + 0.305278786000, -0.006032900700, 0.001385321000, -0.000527217900, + 0.000991980200, -0.002123501700, 0.003631277700, -0.003740453600}, + { -0.000451769900, -0.003003554800, 0.002910117000, -0.000951286800, + -0.001333568200, 0.003959036100, -0.007305163600, 0.014129397200, + 0.305266353700, -0.006318495400, 0.001522406400, -0.000607494800, + 0.001032856600, -0.002146282400, 0.003641224700, -0.003751272400}, + { -0.000458704700, -0.003001127800, 0.002902744400, -0.000943766600, + -0.001356286200, 0.004011606300, -0.007413160700, 0.014416934300, + 0.305252141100, -0.006612456400, 0.001659411900, -0.000686657100, + 0.001082013300, -0.002177942800, 0.003660354300, -0.003772222700}, + { -0.000464860200, -0.002989337500, 0.002894831400, -0.000936224300, + -0.001378788100, 0.004063782200, -0.007520350800, 0.014714266400, + 0.305236860000, -0.006896906600, 0.001797689700, -0.000758115600, + 0.001122795500, -0.002200335400, 0.003679910900, -0.003784248300}, + { -0.000471906600, -0.002986971300, 0.002887514500, -0.000929037200, + -0.001400660100, 0.004114765900, -0.007637074700, 0.015003290600, + 0.305220409100, -0.007180134300, 0.001934852000, -0.000838292300, + 0.001173609200, -0.002223798900, 0.003689540000, -0.003804721500}, + { -0.000478005400, -0.002974973900, 0.002890024200, -0.000912804200, + -0.001423103300, 0.004166752200, -0.007744430700, 0.015292286200, + 0.305202709200, -0.007462776200, 0.002072061100, -0.000919168800, + 0.001214479000, -0.002256106400, 0.003709864100, -0.003816509300}, + { -0.000485004200, -0.002972661400, 0.002883075100, -0.000906121000, + -0.001444400500, 0.004217388300, -0.007861128300, 0.015582827100, + 0.305182829200, -0.007754203600, 0.002210354400, -0.001000782800, + 0.001256255000, -0.002278849600, 0.003729366300, -0.003838542900}, + { -0.000491739700, -0.002970642300, 0.002875940200, -0.000898797900, + -0.001467267200, 0.004259593600, -0.007967928800, 0.015873669900, + 0.305171969300, -0.008036267100, 0.002349092100, -0.001072739500, + 0.001307766700, -0.002302777900, 0.003749888800, -0.003850712800}, + { -0.000497291200, -0.002959470700, 0.002868512700, -0.000891753800, + -0.001488975800, 0.004311149200, -0.008075516000, 0.016155288200, + 0.305150388000, -0.008316860100, 0.002487086200, -0.001154788900, + 0.001350046400, -0.002336945300, 0.003761723500, -0.003862439200}, + { -0.000504731900, -0.002956450800, 0.002871262500, -0.000886061100, + -0.001509538700, 0.004360886000, -0.008191697200, 0.016448378400, + 0.305135746300, -0.008606633700, 0.002625496200, -0.001236297400, + 0.001401956000, -0.002360832800, 0.003781771800, -0.003884434400}, + { -0.000510187200, -0.002945799600, 0.002865288500, -0.000870352000, + -0.001531072000, 0.004411842900, -0.008298811000, 0.016741989300, + 0.305119193600, -0.008895401800, 0.002764325800, -0.001319345500, + 0.001445432500, -0.002385302500, 0.003803145000, -0.003898001300}, + { -0.000516831700, -0.002943863500, 0.002858545300, -0.000863995200, + -0.001552015500, 0.004462029400, -0.008404468200, 0.017034066400, + 0.305093771700, -0.009174192300, 0.002903186600, -0.001391517800, + 0.001496396500, -0.002418783000, 0.003823612700, -0.003919897700}, + { -0.000522254800, -0.002933011700, 0.002851744600, -0.000857749800, + -0.001572829300, 0.004512760300, -0.008512038500, 0.017319625600, + 0.305074913300, -0.009461842200, 0.003042687600, -0.001475450400, + 0.001541005000, -0.002444849800, 0.003836006300, -0.003932401200}, + { -0.000529601300, -0.002930099600, 0.002854820400, -0.000852704600, + -0.001592429500, 0.004561274000, -0.008627194900, 0.017615006900, + 0.305054496900, -0.009748599800, 0.003181615900, -0.001558137300, + 0.001594324200, -0.002470181000, 0.003857769000, -0.003945651400}, + { -0.000534931600, -0.002919339300, 0.002848150100, -0.000846731600, + -0.001612810100, 0.004611510300, -0.008734196000, 0.017901410200, + 0.305033298300, -0.010034991200, 0.003321126400, -0.001641796500, + 0.001637657900, -0.002504378200, 0.003879253900, -0.003968435100}, + { -0.000541059600, -0.002918541200, 0.002843043800, -0.000831250300, + -0.001634950600, 0.004652450700, -0.008840558000, 0.018188790800, + 0.305021093200, -0.010312015500, 0.003461182700, -0.001716342200, + 0.001692059700, -0.002531656000, 0.003892381100, -0.003981307700}, + { -0.000546470300, -0.002907704700, 0.002836444100, -0.000825608000, + -0.001654657700, 0.004701589700, -0.008946113900, 0.018485752400, + 0.304996474100, -0.010596860500, 0.003601134500, -0.001801009400, + 0.001736945200, -0.002556816600, 0.003913332400, -0.004004119100}, + { -0.000553421000, -0.002905324000, 0.002840026900, -0.000820954500, + -0.001674129700, 0.004751033800, -0.009052649800, 0.018774565100, + 0.304980747200, -0.010881885600, 0.003741264500, -0.001885730400, + 0.001781462400, -0.002592340800, 0.003936717300, -0.004018305800}, + { -0.000558583500, -0.002894695900, 0.002833775600, -0.000815677100, + -0.001693616200, 0.004800033700, -0.009158923400, 0.019063780300, + 0.304952581400, -0.011174339100, 0.003881652600, -0.001970197900, + 0.001836677600, -0.002619434800, 0.003959785900, -0.004032488300}, + { -0.000564955500, -0.002893561300, 0.002828266400, -0.000810671500, + -0.001712856100, 0.004849033400, -0.009265048200, 0.019354083600, + 0.304933847400, -0.011457786400, 0.004022181900, -0.002055659400, + 0.001882018300, -0.002656075600, 0.003973130000, -0.004055549600}, + { -0.000569990700, -0.002883172300, 0.002821862700, -0.000805210900, + -0.001733255400, 0.004888539700, -0.009370571700, 0.019645015200, + 0.304913195700, -0.011739650400, 0.004173020900, -0.002132234200, + 0.001937624600, -0.002683625100, 0.003996648800, -0.004070086100}, + { -0.000576816700, -0.002880712200, 0.002825556300, -0.000801110200, + -0.001751710900, 0.004936537800, -0.009475750900, 0.019935070200, + 0.304882818800, -0.012021363400, 0.004313763100, -0.002218448800, + 0.001984116400, -0.002710135300, 0.004018650800, -0.004093587400}, + { -0.000581575700, -0.002871029700, 0.002820817600, -0.000787003500, + -0.001771208200, 0.004984922000, -0.009581223300, 0.020227247700, + 0.304859324200, -0.012301966200, 0.004453794800, -0.002303557800, + 0.002039760600, -0.002748740500, 0.004034016500, -0.004107894100}, + { -0.000587752000, -0.002870343700, 0.002816043200, -0.000782738000, + -0.001789823800, 0.005033655500, -0.009688643500, 0.020512328500, + 0.304843748500, -0.012592772900, 0.004596104100, -0.002391051400, + 0.002087605700, -0.002776914700, 0.004058501200, -0.004123192100}, + { -0.000592610900, -0.002860138800, 0.002809924700, -0.000777559700, + -0.001809457700, 0.005071968700, -0.009792977800, 0.020805308800, + 0.304818061900, -0.012872591200, 0.004737013500, -0.002477149500, + 0.002144535400, -0.002805006000, 0.004081504200, -0.004147500100}, + { -0.000598855700, -0.002859183800, 0.002804989100, -0.000773711900, + -0.001827130700, 0.005119292900, -0.009897966800, 0.021099553600, + 0.304789228200, -0.013159535100, 0.004888119300, -0.002565148700, + 0.002191690700, -0.002842615100, 0.004106712900, -0.004163246800}, + { -0.000604129100, -0.002847973400, 0.002808540700, -0.000770115000, + -0.001845151700, 0.005166831300, -0.010003738700, 0.021384603000, + 0.304761759800, -0.013438827400, 0.005030699900, -0.002642744000, + 0.002248900100, -0.002872462400, 0.004120956700, -0.004186959300}, + { -0.000610304100, -0.002847187000, 0.002803718400, -0.000765887200, + -0.001863954900, 0.005204656900, -0.010108070800, 0.021680873600, + 0.304740017400, -0.013726605300, 0.005173599700, -0.002731549500, + 0.002298070200, -0.002901375100, 0.004145912600, -0.004202808100}, + { -0.000614775300, -0.002837812200, 0.002799287100, -0.000751378700, + -0.001873340800, 0.005251545400, -0.010213985000, 0.021968758500, + 0.304717226400, -0.014013384600, 0.005315723700, -0.002818772700, + 0.002355245100, -0.002940735300, 0.004172433300, -0.004219310800}, + { -0.000620567900, -0.002837320300, 0.002794490100, -0.000747532900, + -0.001891056000, 0.005299546400, -0.010309931700, 0.022254934400, + 0.304685251500, -0.014288738100, 0.005467214000, -0.002908733500, + 0.002405494900, -0.002971287100, 0.004187192800, -0.004243542100}, + { -0.000625856800, -0.002826051900, 0.002798242800, -0.000744164300, + -0.001909172200, 0.005336339900, -0.010413292700, 0.022552838800, + 0.304659243200, -0.014574287100, 0.005609909000, -0.002997005100, + 0.002464760300, -0.003001761400, 0.004213047800, -0.004260008200}, + { -0.000631862500, -0.002825655200, 0.002794055000, -0.000740960500, + -0.001926054500, 0.005382887800, -0.010518612100, 0.022842239000, + 0.304632697600, -0.014859275700, 0.005753037200, -0.003086221600, + 0.002513598800, -0.003041041300, 0.004239878600, -0.004276818000}, + { -0.000636395600, -0.002816103900, 0.002789011800, -0.000737686400, + -0.001942767200, 0.005429023200, -0.010623938900, 0.023132882500, + 0.304603952900, -0.015142040300, 0.005905217400, -0.003175720900, + 0.002573489500, -0.003072014700, 0.004265540900, -0.004304236600}, + { -0.000642092900, -0.002815797600, 0.002784613800, -0.000733852000, + -0.001961254600, 0.005467201200, -0.010718772800, 0.023422456600, + 0.304574941900, -0.015425894300, 0.006049293000, -0.003266869300, + 0.002625535200, -0.003104379700, 0.004282770700, -0.004320158000}, + { -0.000647228100, -0.002805014900, 0.002789105500, -0.000731700400, + -0.001977598200, 0.005513183700, -0.010823907600, 0.023714956500, + 0.304553789700, -0.015710304400, 0.006194068900, -0.003346361400, + 0.002683321200, -0.003144467600, 0.004309182300, -0.004346993300}, + { -0.000653083600, -0.002804663400, 0.002784856700, -0.000728568200, + -0.001994892300, 0.005549280000, -0.010927513200, 0.024006627800, + 0.304521224900, -0.015991075000, 0.006347319100, -0.003438518800, + 0.002735447600, -0.003175763900, 0.004336078200, -0.004364270700}, + { -0.000657344200, -0.002795073400, 0.002779372400, -0.000723818600, + -0.002002390300, 0.005594924600, -0.011022085400, 0.024298095100, + 0.304487711300, -0.016272419800, 0.006491186300, -0.003529018000, + 0.002797300300, -0.003209814400, 0.004354269100, -0.004380801100}, + { -0.000662778500, -0.002795722300, 0.002777270600, -0.000711766400, + -0.002020750200, 0.005631997300, -0.011127576100, 0.024583535100, + 0.304461685600, -0.016563520800, 0.006636493100, -0.003620768300, + 0.002848323700, -0.003250508600, 0.004381556100, -0.004408351500}, + { -0.000667608000, -0.002785075900, 0.002781813700, -0.000709968500, + -0.002036501400, 0.005678048700, -0.011222628000, 0.024877740200, + 0.304434316000, -0.016843034600, 0.006789868500, -0.003712463600, + 0.002910777700, -0.003284178700, 0.004410227500, -0.004427971200}, + { -0.000673385900, -0.002784864800, 0.002777897100, -0.000707171500, + -0.002053187900, 0.005713078000, -0.011324954000, 0.025171340700, + 0.304396703900, -0.017122233300, 0.006934475400, -0.003805137400, + 0.002964408100, -0.003317850200, 0.004428620600, -0.004444717000}, + { -0.000677353300, -0.002775967700, 0.002773610300, -0.000704737500, + -0.002069178100, 0.005759636500, -0.011421394400, 0.025458149700, + 0.304365892000, -0.017409498600, 0.007088896400, -0.003897094200, + 0.003025836900, -0.003360743400, 0.004457105300, -0.004473209500}, + { -0.000683225000, -0.002775689300, 0.002769352400, -0.000700635700, + -0.002076469800, 0.005793241800, -0.011523237400, 0.025753901000, + 0.304334966200, -0.017688142300, 0.007233613500, -0.003988893700, + 0.003088733700, -0.003394540900, 0.004485759300, -0.004491807700}, + { -0.000687773900, -0.002765299300, 0.002774214900, -0.000699143000, + -0.002091778900, 0.005839096700, -0.011618952200, 0.026040879100, + 0.304303942000, -0.017967050900, 0.007379100700, -0.004082281200, + 0.003142614000, -0.003427404600, 0.004512855100, -0.004519537900}, + { -0.000693472400, -0.002765610600, 0.002771073600, -0.000697223600, + -0.002107933500, 0.005874142700, -0.011723107400, 0.026331338900, + 0.304277893000, -0.018252857000, 0.007534408900, -0.004175833600, + 0.003206136100, -0.003473370900, 0.004534730300, -0.004538643100}, + { -0.000697463600, -0.002756676800, 0.002766964300, -0.000695192900, + -0.002122896500, 0.005919133800, -0.011817129000, 0.026629304300, + 0.304240989200, -0.018538518600, 0.007680828300, -0.004270140400, + 0.003260984500, -0.003507314200, 0.004563753700, -0.004557731900}, + { -0.000703804500, -0.002754968200, 0.002772071700, -0.000692658900, + -0.002129046300, 0.005951839900, -0.011919323000, 0.026918136600, + 0.304205403100, -0.018813366000, 0.007835360000, -0.004364128800, + 0.003325426100, -0.003542025400, 0.004592039500, -0.004586402700}, + { -0.000707548200, -0.002746360600, 0.002768039100, -0.000690246800, + -0.002145830400, 0.005988351400, -0.012013907400, 0.027209072700, + 0.304176499000, -0.019099860600, 0.007984496700, -0.004450296100, + 0.003379587100, -0.003587618200, 0.004614320900, -0.004606892100}, + { -0.000713093800, -0.002746934500, 0.002765358600, -0.000689188800, + -0.002160148400, 0.006033055000, -0.012109150300, 0.027501099300, + 0.304145203000, -0.019382840700, 0.008140437800, -0.004545293600, + 0.003445318600, -0.003624168800, 0.004645205500, -0.004627325000}, + { -0.000716707400, -0.002738242600, 0.002761221600, -0.000686846600, + -0.002176335900, 0.006068547500, -0.012202678300, 0.027791528800, + 0.304104283300, -0.019666250900, 0.008287779500, -0.004641076300, + 0.003501561200, -0.003658751600, 0.004673640000, -0.004656297500}, + { -0.000722876700, -0.002737400900, 0.002768917300, -0.000676615300, + -0.002181080700, 0.006110074200, -0.012305860200, 0.028085420100, + 0.304069875300, -0.019947475400, 0.008443532900, -0.004736314300, + 0.003566616300, -0.003706438500, 0.004696952300, -0.004676431900}, + { -0.000726338700, -0.002729093500, 0.002765145400, -0.000674502100, + -0.002197280000, 0.006146021300, -0.012401276400, 0.028369440400, + 0.304036328200, -0.020230886000, 0.008591834800, -0.004833255700, + 0.003624215600, -0.003742729000, 0.004728140600, -0.004696972400}, + { -0.000731741100, -0.002729709100, 0.002762428000, -0.000673225600, + -0.002212588700, 0.006180633900, -0.012494169800, 0.028662906200, + 0.304000013700, -0.020511012600, 0.008748066500, -0.004928785800, + 0.003689348800, -0.003788813700, 0.004759315900, -0.004727789800}, + { -0.000735386100, -0.002721352900, 0.002758959300, -0.000672109200, + -0.002226292500, 0.006224450200, -0.012588365800, 0.028957720600, + 0.303962072700, -0.020792099600, 0.008896179300, -0.005026132300, + 0.003747768000, -0.003827110500, 0.004781502000, -0.004747416400}, + { -0.000741467200, -0.002720193100, 0.002764997300, -0.000670457700, + -0.002231939800, 0.006257946300, -0.012682582100, 0.029244511900, + 0.303933574300, -0.021072898700, 0.009054063500, -0.005123937600, + 0.003816203000, -0.003865951000, 0.004814396600, -0.004769350200}, + { -0.000745150400, -0.002711729700, 0.002761387900, -0.000669147800, + -0.002246404800, 0.006290380500, -0.012784037700, 0.029540755700, + 0.303893044500, -0.021352478200, 0.009201932100, -0.005220680700, + 0.003872318000, -0.003911010800, 0.004845423900, -0.004800157600}, + { -0.000748453500, -0.002703558700, 0.002757747500, -0.000667691900, + -0.002261575500, 0.006324791000, -0.012877926700, 0.029827673100, + 0.303851724000, -0.021629843300, 0.009359170900, -0.005318693300, + 0.003941798300, -0.003952016200, 0.004869373400, -0.004820883700}, + { -0.000753869400, -0.002704376900, 0.002755157700, -0.000665839200, + -0.002265346200, 0.006366843000, -0.012972784600, 0.030116858700, + 0.303818672700, -0.021907914800, 0.009517056100, -0.005416881700, + 0.004010497500, -0.003990905800, 0.004901022400, -0.004852436300}, + { -0.000758084800, -0.002694258700, 0.002760975500, -0.000666054000, + -0.002279366500, 0.006400355600, -0.013065059600, 0.030415689700, + 0.303781885400, -0.022195988700, 0.009667243000, -0.005515687900, + 0.004068687500, -0.004038259800, 0.004935181400, -0.004875341600}, + { -0.000763249100, -0.002695207900, 0.002758724400, -0.000665508700, + -0.002293624700, 0.006433708500, -0.013157852400, 0.030703785700, + 0.303736794400, -0.022471040100, 0.009824735600, -0.005614528900, + 0.004139136400, -0.004080198200, 0.004959857600, -0.004896598000}, + { -0.000766591100, -0.002686928900, 0.002754921900, -0.000662665200, + -0.002299129500, 0.006466426100, -0.013251271900, 0.030994692400, + 0.303698508100, -0.022758266200, 0.009976047300, -0.005715080300, + 0.004199827500, -0.004118619000, 0.004991603200, -0.004928421800}, + { -0.000771897700, -0.002688143900, 0.002753279200, -0.000663161200, + -0.002311137900, 0.006508681700, -0.013345456800, 0.031285977700, + 0.303659766600, -0.023032853100, 0.010133649800, -0.005813434400, + 0.004268203300, -0.004168660700, 0.005027973800, -0.004954054600}, + { -0.000775874300, -0.002678215700, 0.002759243400, -0.000663934100, + -0.002324484400, 0.006541662700, -0.013438640200, 0.031578389200, + 0.303617696900, -0.023316437000, 0.010294044300, -0.005916048600, + 0.004331218000, -0.004210712400, 0.005053314100, -0.004976115300}, + { -0.000781019600, -0.002679339200, 0.002756838700, -0.000661553700, + -0.002330048900, 0.006575368200, -0.013522135600, 0.031870307600, + 0.303587200300, -0.023593760200, 0.010444472000, -0.006015436800, + 0.004402065400, -0.004251580600, 0.005086442900, -0.005008967600}, + { -0.000784186200, -0.002671460600, 0.002753849700, -0.000661193300, + -0.002343698500, 0.006608303400, -0.013614934700, 0.032163622800, + 0.303542329100, -0.023875417200, 0.010604498100, -0.006117430000, + 0.004462875900, -0.004301162100, 0.005122582000, -0.005033354700}, + { -0.000790060900, -0.002670435300, 0.002760669100, -0.000660699900, + -0.002347647400, 0.006639524200, -0.013708092900, 0.032447376800, + 0.303498774900, -0.024159306200, 0.010756094800, -0.006218141400, + 0.004535552600, -0.004345436200, 0.005149409700, -0.005056179700}, + { -0.000793256200, -0.002662733600, 0.002757935000, -0.000660639200, + -0.002360988600, 0.006672169900, -0.013801224500, 0.032743987500, + 0.303460194300, -0.024440923300, 0.010917290800, -0.006321772900, + 0.004598983100, -0.004386048500, 0.005182927100, -0.005089678500}, + { -0.000798421400, -0.002663989900, 0.002756297000, -0.000661054500, + -0.002373851700, 0.006703786000, -0.013892349300, 0.033037596300, + 0.303413510200, -0.024710885300, 0.011075401800, -0.006421881700, + 0.004669293700, -0.004437547500, 0.005220209500, -0.005114783000}, + { -0.000801279400, -0.002656330700, 0.002752971400, -0.000658573800, + -0.002379104500, 0.006737428400, -0.013977140000, 0.033324125700, + 0.303374972100, -0.024994140700, 0.011228843600, -0.006526058700, + 0.004734510300, -0.004481928500, 0.005247478500, -0.005138134800}, + { -0.000807307300, -0.002655561700, 0.002760752200, -0.000660917000, + -0.002390857100, 0.006768397200, -0.014068603800, 0.033621534300, + 0.303332931800, -0.025273538400, 0.011389660100, -0.006628580100, + 0.004808058900, -0.004525112200, 0.005283024100, -0.005174599300}, + { -0.000810410400, -0.002647879500, 0.002757788100, -0.000659822800, + -0.002392746100, 0.006808149800, -0.014163218900, 0.033911264900, + 0.303289891500, -0.025552150000, 0.011550770700, -0.006731633300, + 0.004882070200, -0.004569322900, 0.005320551900, -0.005200360700}, + { -0.000815493900, -0.002649463500, 0.002756849600, -0.000660759200, + -0.002405245800, 0.006840045300, -0.014256420100, 0.034200612800, + 0.303246711500, -0.025832407000, 0.011703557800, -0.006835218200, + 0.004945699400, -0.004623184600, 0.005349893100, -0.005224926800}, + { -0.000818255300, -0.002641882000, 0.002753575300, -0.000658712000, + -0.002409717000, 0.006872439200, -0.014340074900, 0.034489243800, + 0.303202188800, -0.026110236100, 0.011865377600, -0.006939263300, + 0.005020650000, -0.004667616900, 0.005385875600, -0.005260165300}, + { -0.000824295400, -0.002641015800, 0.002761659300, -0.000661736600, + -0.002420492700, 0.006902452300, -0.014430720500, 0.034789552300, + 0.303152032300, -0.026395378600, 0.012028731400, -0.007046166400, + 0.005088017800, -0.004713545300, 0.005414457900, -0.005284879700}, + { -0.000827077700, -0.002633626200, 0.002758707300, -0.000659803100, + -0.002424890300, 0.006934810400, -0.014514347800, 0.035080728500, + 0.303114481500, -0.026675115000, 0.012182095400, -0.007148973700, + 0.005161314600, -0.004768172600, 0.005454664900, -0.005312372600}, + { -0.000832149000, -0.002635253900, 0.002757890500, -0.000661304500, + -0.002436456100, 0.006965471700, -0.014606652200, 0.035372382500, + 0.303065098100, -0.026950110700, 0.012344296500, -0.007255591100, + 0.005228310500, -0.004812448700, 0.005492759000, -0.005338761500}, + { -0.000835595400, -0.002625576200, 0.002763550700, -0.000660398000, + -0.002441798600, 0.006987700700, -0.014687960800, 0.035664170800, + 0.303024793500, -0.027226932800, 0.012507686500, -0.007362011400, + 0.005306264800, -0.004861076400, 0.005521878400, -0.005375829600}, + { -0.000840529300, -0.002627331400, 0.002763058500, -0.000662228600, + -0.002453253700, 0.007018769500, -0.014782113000, 0.035948446100, + 0.302972042600, -0.027509888900, 0.012671612300, -0.007469387600, + 0.005372340600, -0.004915494200, 0.005562631500, -0.005403790100}, + { -0.000843220100, -0.002620023800, 0.002760230300, -0.000660605900, + -0.002456982100, 0.007050007700, -0.014864692500, 0.036241695300, + 0.302928958400, -0.027786579600, 0.012825587300, -0.007574066800, + 0.005449141300, -0.004962376800, 0.005602376800, -0.005431440200}, + { -0.000848374500, -0.002621758400, 0.002759921300, -0.000662808200, + -0.002467783200, 0.007079958900, -0.014956855300, 0.036538564300, + 0.302880652200, -0.028068772200, 0.012990451100, -0.007683264800, + 0.005518700600, -0.005009907900, 0.005630853800, -0.005466715000}, + { -0.000851700900, -0.002612329100, 0.002766079600, -0.000662984200, + -0.002470535200, 0.007110968900, -0.015040742100, 0.036823681900, + 0.302836038100, -0.028342627700, 0.013153651000, -0.007789506200, + 0.005594863000, -0.005067111000, 0.005673345500, -0.005495962500}, + { -0.000856978400, -0.002613369900, 0.002763436900, -0.000673963700, + -0.002480246400, 0.007141770500, -0.015122696500, 0.037119392900, + 0.302785848400, -0.028623546600, 0.013318487200, -0.007897426600, + 0.005674706600, -0.005118090200, 0.005705570200, -0.005523266400}, + { -0.000859635800, -0.002606209100, 0.002760875300, -0.000672918600, + -0.002482718300, 0.007170544900, -0.015215522800, 0.037407381800, + 0.302738295200, -0.028897800000, 0.013473662100, -0.008005370600, + 0.005743489200, -0.005163726600, 0.005743450200, -0.005560237100}, + { -0.000863002200, -0.002596834100, 0.002767923100, -0.000675974400, + -0.002493386100, 0.007201598200, -0.015297293000, 0.037704094600, + 0.302685277400, -0.029176984600, 0.013637747600, -0.008112433600, + 0.005820627000, -0.005221758100, 0.005786648100, -0.005590368200}, + { -0.000867828800, -0.002598670200, 0.002766900000, -0.000675186800, + -0.002497981300, 0.007223193600, -0.015379756300, 0.037993797900, + 0.302642069200, -0.029458517200, 0.013805185000, -0.008225058800, + 0.005893724000, -0.005272906200, 0.005819685000, -0.005618333200}, + { -0.000870433100, -0.002591465700, 0.002764455200, -0.000674653700, + -0.002499759200, 0.007250951700, -0.015471874400, 0.038284268000, + 0.302587106700, -0.029736435400, 0.013970133000, -0.008333790400, + 0.005973581100, -0.005321845400, 0.005859684300, -0.005657037700}, + { -0.000875301600, -0.002593729700, 0.002764759200, -0.000677160100, + -0.002510659300, 0.007282648600, -0.015556232000, 0.038575785600, + 0.302541430200, -0.030018298000, 0.014128120100, -0.008444322900, + 0.006045352000, -0.005370854200, 0.005902462900, -0.005689051600}, + { -0.000878577700, -0.002584426900, 0.002771222400, -0.000678044400, + -0.002512230800, 0.007312074800, -0.015638780000, 0.038866667200, + 0.302496054000, -0.030288050400, 0.014292583100, -0.008553225900, + 0.006124864000, -0.005432964900, 0.005938417900, -0.005718682200}, + { -0.000883371200, -0.002586549000, 0.002771330800, -0.000680774600, + -0.002522537800, 0.007342861000, -0.015721824200, 0.039157112000, + 0.302437909100, -0.030564097400, 0.014458560600, -0.008665183400, + 0.006197183700, -0.005481519600, 0.005978394700, -0.005757587300}, + { -0.000885670100, -0.002579613300, 0.002768808300, -0.000679473400, + -0.002527024400, 0.007363441100, -0.015802899000, 0.039449376300, + 0.302387399600, -0.030841085500, 0.014625338400, -0.008776679900, + 0.006280742400, -0.005535923100, 0.006013604000, -0.005787145500}, + { -0.000891447000, -0.002579042300, 0.002777178000, -0.000681709200, + -0.002527331400, 0.007391840300, -0.015884752000, 0.039742056100, + 0.302336098600, -0.031117719900, 0.014792369000, -0.008889397700, + 0.006352081900, -0.005595185800, 0.006058727200, -0.005818964100}, + { -0.000893764400, -0.002572520300, 0.002775953500, -0.000683794500, + -0.002538011700, 0.007422582300, -0.015967839100, 0.040036207000, + 0.302282936600, -0.031394934700, 0.014949546000, -0.008998513600, + 0.006433987600, -0.005646959300, 0.006102867800, -0.005850311400}, + { -0.000899005400, -0.002573622600, 0.002772999400, -0.000692720800, + -0.002539675700, 0.007441443200, -0.016048205900, 0.040332527300, + 0.302235443400, -0.031680564700, 0.015119563200, -0.009112249200, + 0.006519185100, -0.005702277700, 0.006137124600, -0.005890136500}, + { -0.000902066000, -0.002564399000, 0.002779793900, -0.000694252800, + -0.002540354300, 0.007470292100, -0.016132038300, 0.040618118300, + 0.302181358200, -0.031955488100, 0.015287200700, -0.009226118300, + 0.006591765300, -0.005762778500, 0.006183472900, -0.005922781400}, + { -0.000906938700, -0.002566684400, 0.002780297700, -0.000697575600, + -0.002549585000, 0.007499502500, -0.016213682700, 0.040913759200, + 0.302123085100, -0.032226770800, 0.015450858700, -0.009346907400, + 0.006678209600, -0.005819558300, 0.006220559900, -0.005953802600}, + { -0.000909073800, -0.002560158500, 0.002778219800, -0.000696753300, + -0.002553495700, 0.007519854200, -0.016296539100, 0.041202514700, + 0.302075447000, -0.032502060800, 0.015620155600, -0.009463128500, + 0.006754676200, -0.005871819500, 0.006264360200, -0.005997784800}, + { -0.000914005100, -0.002562081100, 0.002778180000, -0.000697783700, + -0.002553821800, 0.007547123000, -0.016377089100, 0.041498813500, + 0.302013593200, -0.032784352400, 0.015779698800, -0.009574791300, + 0.006838885700, -0.005925655500, 0.006310258300, -0.006030707600}, + { -0.000917048000, -0.002553292100, 0.002786094000, -0.000702405400, + -0.002562617900, 0.007576990700, -0.016461435000, 0.041789551900, + 0.301962237800, -0.033057262400, 0.015948108300, -0.009690267000, + 0.006913942100, -0.005990311300, 0.006349328800, -0.006062933900}, + { -0.000921807000, -0.002555562000, 0.002785897700, -0.000702731900, + -0.002565562900, 0.007595979900, -0.016542771000, 0.042079113900, + 0.301910604800, -0.033329631800, 0.016116562300, -0.009804671800, + 0.007000329800, -0.006045818500, 0.006396695800, -0.006096934600}, + { -0.000924334400, -0.002548160800, 0.002781648600, -0.000711981100, + -0.002564257200, 0.007623115300, -0.016625851600, 0.042371265900, + 0.301854227800, -0.033610088700, 0.016287909800, -0.009922951400, + 0.007079130900, -0.006101996500, 0.006432255400, -0.006137812800}, + { -0.000927029000, -0.002539353500, 0.002789348200, -0.000715860200, + -0.002576064600, 0.007646012100, -0.016699057600, 0.042661393000, + 0.301797671500, -0.033890060300, 0.016458330300, -0.010038799500, + 0.007167086900, -0.006158957400, 0.006480627500, -0.006172693900}, + { -0.000931902700, -0.002541677100, 0.002789556600, -0.000717300000, + -0.002575750000, 0.007673144100, -0.016781259600, 0.042953309700, + 0.301741305700, -0.034159645200, 0.016627163700, -0.010155742500, + 0.007243536900, -0.006224969100, 0.006520927200, -0.006205898400}, + { -0.000933911800, -0.002535183000, 0.002787827800, -0.000716968300, + -0.002578831000, 0.007691723300, -0.016861993700, 0.043245651300, + 0.301682214200, -0.034440183200, 0.016788863600, -0.010270201400, + 0.007330632700, -0.006281032900, 0.006567502700, -0.006252289300}, + { -0.000939577700, -0.002534998100, 0.002796975700, -0.000720236600, + -0.002578087200, 0.007720653400, -0.016935463200, 0.043538995400, + 0.301633352100, -0.034711193400, 0.016959520500, -0.010389329800, + 0.007410503600, -0.006337047100, 0.006615790600, -0.006287412000}, + { -0.000941518100, -0.002528823500, 0.002796070900, -0.000722919300, + -0.002589426200, 0.007740582000, -0.017016393700, 0.043833061900, + 0.301569791200, -0.034986501900, 0.017129209400, -0.010505114500, + 0.007497394200, -0.006406316800, 0.006658344700, -0.006322346100}, + { -0.000946421000, -0.002531073000, 0.002796544700, -0.000724851500, + -0.002588324800, 0.007766585900, -0.017097850700, 0.044127692000, + 0.301505220000, -0.035261474700, 0.017300143900, -0.010624650100, + 0.007577627400, -0.006462535400, 0.006707002900, -0.006357778700}, + { -0.000949359500, -0.002521316700, 0.002800927700, -0.000735266600, + -0.002588925400, 0.007786312500, -0.017171090300, 0.044413545900, + 0.301451854500, -0.035540053600, 0.017473398600, -0.010743983400, + 0.007669196200, -0.006524338300, 0.006746574400, -0.006401639200}, + { -0.000954362300, -0.002523680400, 0.002801635500, -0.000737454900, + -0.002587518900, 0.007812074200, -0.017252787500, 0.044711874800, + 0.301393202700, -0.035815202400, 0.017645387000, -0.010864711900, + 0.007750585800, -0.006581589800, 0.006796203400, -0.006438009700}, + { -0.000956035500, -0.002517691400, 0.002800277200, -0.000737232300, + -0.002591035300, 0.007833242200, -0.017326384300, 0.044998869000, + 0.301337182700, -0.036093943200, 0.017809064300, -0.010981524900, + 0.007839125400, -0.006652924700, 0.006840684600, -0.006474304300}, + { -0.000961768900, -0.002517572500, 0.002810197300, -0.000743330300, + -0.002599865500, 0.007850982300, -0.017405665400, 0.045296336600, + 0.301279455500, -0.036359405400, 0.017979361500, -0.011101909300, + 0.007921089700, -0.006710841700, 0.006891483500, -0.006513718100}, + { -0.000963551800, -0.002511606900, 0.002809097600, -0.000744270100, + -0.002600023100, 0.007880162200, -0.017480905500, 0.045585787000, + 0.301219274700, -0.036633262700, 0.018151488300, -0.011221145500, + 0.008012401600, -0.006770837100, 0.006940658100, -0.006560412300}, + { -0.000968652300, -0.002513137500, 0.002806943500, -0.000754654000, + -0.002599054600, 0.007895994400, -0.017561716500, 0.045876647100, + 0.301155082000, -0.036915500100, 0.018327200500, -0.011345476800, + 0.008097770300, -0.006833460100, 0.006983825400, -0.006596465200}, + { -0.000970271500, -0.002507180900, 0.002805563000, -0.000754783900, + -0.002601855100, 0.007916113200, -0.017634207100, 0.046165570800, + 0.301092874000, -0.037187988000, 0.018499388000, -0.011464768800, + 0.008187619300, -0.006903834600, 0.007038598500, -0.006636191300}, + { -0.000975884100, -0.002506921900, 0.002815167300, -0.000759066900, + -0.002599558300, 0.007943302900, -0.017707594600, 0.046455528400, + 0.301029284000, -0.037460064500, 0.018672889900, -0.011588469400, + 0.008272917200, -0.006966950500, 0.007082025300, -0.006672404900}, + { -0.000977775800, -0.002500954000, 0.002814043200, -0.000759764200, + -0.002601193300, 0.007960205400, -0.017789132800, 0.046750583700, + 0.300971836600, -0.037732212300, 0.018846008900, -0.011709400800, + 0.008366076000, -0.007028371700, 0.007132703000, -0.006720561000}, + { -0.000982640700, -0.002503047600, 0.002812783500, -0.000772729700, + -0.002609428000, 0.007980042300, -0.017860728900, 0.047040567000, + 0.300906624700, -0.038005599900, 0.019011206000, -0.011831617600, + 0.008451307000, -0.007091540900, 0.007176317300, -0.006756891400}, + { -0.000985371100, -0.002494154700, 0.002820679000, -0.000776398500, + -0.002607418500, 0.008007388400, -0.017934700400, 0.047335766700, + 0.300844278100, -0.038285177900, 0.019186232700, -0.011953311100, + 0.008543197100, -0.007164042200, 0.007233149500, -0.006798540800}, + { -0.000987006200, -0.002488366700, 0.002819585000, -0.000777006200, + -0.002609399200, 0.008026572200, -0.018006655200, 0.047630120500, + 0.300783801700, -0.038555831700, 0.019361129300, -0.012079179200, + 0.008630964100, -0.007229475600, 0.007279247200, -0.006839293900}, + { -0.000991790800, -0.002490903500, 0.002820404900, -0.000778976000, + -0.002609670800, 0.008041528500, -0.018085444700, 0.047923413800, + 0.300712721600, -0.038822537300, 0.019533132300, -0.012200346800, + 0.008724921800, -0.007291803100, 0.007330615400, -0.006888043700}, + { -0.000994223400, -0.002482150700, 0.002828146400, -0.000781936000, + -0.002610430800, 0.008060409500, -0.018159587600, 0.048210544400, + 0.300648793200, -0.039101469100, 0.019710850400, -0.012328366300, + 0.008814417700, -0.007358544200, 0.007377316500, -0.006926866100}, + { -0.000999231800, -0.002484009500, 0.002826100200, -0.000792419400, + -0.002609247300, 0.008077029200, -0.018229228000, 0.048504934800, + 0.300585104500, -0.039369675200, 0.019885022200, -0.012451522700, + 0.008910574100, -0.007423567800, 0.007433072000, -0.006968240600}, + { -0.001000945500, -0.002478474500, 0.002825789700, -0.000794439900, + -0.002607855100, 0.008105184800, -0.018306252700, 0.048797495500, + 0.300525821900, -0.039647806700, 0.020062891600, -0.012579180400, + 0.008997547100, -0.007499733800, 0.007482806700, -0.007009147900}, + { -0.001006580300, -0.002478145100, 0.002835512200, -0.000798497500, + -0.002607487200, 0.008121814200, -0.018375427900, 0.049092597200, + 0.300460428800, -0.039917450300, 0.020228151600, -0.012700466400, + 0.009092708900, -0.007563628700, 0.007535511600, -0.007059152000}, + { -0.001008355200, -0.002471553400, 0.002831584800, -0.000808105100, + -0.002606745200, 0.008139326600, -0.018448182600, 0.049381202700, + 0.300391149900, -0.040192877300, 0.020406004000, -0.012829151000, + 0.009182604800, -0.007628580400, 0.007591957300, -0.007101149600}, + { -0.001013982200, -0.002471464900, 0.002841559200, -0.000812553500, + -0.002606187400, 0.008156499100, -0.018520619000, 0.049672014200, + 0.300332528600, -0.040460854200, 0.020581902400, -0.012954925100, + 0.009282076700, -0.007699103200, 0.007641451600, -0.007142203300}, + { -0.001015410900, -0.002465827800, 0.002840812400, -0.000813752200, + -0.002607328400, 0.008174805900, -0.018593391800, 0.049961784800, + 0.300260370700, -0.040734108300, 0.020758957600, -0.013083002200, + 0.009369503200, -0.007773543100, 0.007701593500, -0.007189132600}, + { -0.001020138600, -0.002468794900, 0.002842431500, -0.000816356100, + -0.002607611600, 0.008192356500, -0.018666130300, 0.050254997100, + 0.300195390400, -0.041008289300, 0.020936736300, -0.013210161600, + 0.009469910200, -0.007844275500, 0.007748911800, -0.007239595400}, + { -0.001022805800, -0.002458999000, 0.002846963800, -0.000828329300, + -0.002605038900, 0.008207999600, -0.018736947600, 0.050544717400, + 0.300121126000, -0.041280619200, 0.021114682600, -0.013339966800, + 0.009561255200, -0.007910621800, 0.007806632000, -0.007282763500}, + { -0.001027526900, -0.002462055200, 0.002848515400, -0.000830876200, + -0.002605154700, 0.008224995500, -0.018808660100, 0.050837682400, + 0.300056546100, -0.041544493000, 0.021290236500, -0.013466505100, + 0.009661947700, -0.007982526900, 0.007857386900, -0.007324914900}, + { -0.001029015400, -0.002456559500, 0.002848095500, -0.000832295700, + -0.002605985800, 0.008242689200, -0.018881182900, 0.051132245200, + 0.299987943200, -0.041819414400, 0.021460165700, -0.013595099000, + 0.009753117800, -0.008049069000, 0.007915606500, -0.007368637400}, + { -0.001034829400, -0.002455432800, 0.002854963900, -0.000845516700, + -0.002603155800, 0.008260465800, -0.018942531200, 0.051423553400, + 0.299919862100, -0.042091828500, 0.021638799100, -0.013722881700, + 0.009852191300, -0.008130314500, 0.007966615900, -0.007421596100}, + { -0.001036184000, -0.002450114700, 0.002854590200, -0.000847135700, + -0.002603834000, 0.008278482500, -0.019017700500, 0.051710800600, + 0.299849767900, -0.042362922600, 0.021817938400, -0.013854742200, + 0.009945759200, -0.008198836000, 0.008026496300, -0.007466532300}, + { -0.001037730800, -0.002444743300, 0.002854305800, -0.000848986600, + -0.002604044700, 0.008295552500, -0.019090033000, 0.052010247800, + 0.299784559000, -0.042634176100, 0.021997806500, -0.013987692600, + 0.010041094600, -0.008271562400, 0.008079345200, -0.007513280300}, + { -0.001043455200, -0.002444344500, 0.002864501800, -0.000854285500, + -0.002602041500, 0.008310446500, -0.019159837900, 0.052305586800, + 0.299710502400, -0.042902559300, 0.022175330800, -0.014116237900, + 0.010143265600, -0.008342511600, 0.008140629300, -0.007559512700}, + { -0.001044875200, -0.002438248600, 0.002860802800, -0.000864181800, + -0.002600981100, 0.008329727400, -0.019224621200, 0.052591247500, + 0.299637947900, -0.043172229900, 0.022355720200, -0.014249993700, + 0.010239585900, -0.008416132400, 0.008193382200, -0.007603296800}, + { -0.001049660900, -0.002441463500, 0.002862933200, -0.000867444000, + -0.002600374000, 0.008346423900, -0.019298671600, 0.052882824900, + 0.299571471100, -0.043441724600, 0.022534686200, -0.014379049200, + 0.010339539800, -0.008496004600, 0.008255099600, -0.007660450500}, + { -0.001051839000, -0.002432661800, 0.002871064200, -0.000871418100, + -0.002599735200, 0.008364846400, -0.019359975400, 0.053178429700, + 0.299491463500, -0.043720040800, 0.022707641400, -0.014511589100, + 0.010435528000, -0.008569545100, 0.008307898100, -0.007704528800}, + { -0.001056944400, -0.002434904100, 0.002869839900, -0.000883216200, + -0.002596486400, 0.008379552700, -0.019432499100, 0.053470206400, + 0.299422575400, -0.043988296500, 0.022887494700, -0.014642563600, + 0.010540183700, -0.008642947600, 0.008371567300, -0.007752716200}, + { -0.001059278600, -0.002426308700, 0.002878112600, -0.000887696700, + -0.002594827000, 0.008395133400, -0.019505511500, 0.053763206500, + 0.299351847700, -0.044255915700, 0.023068158700, -0.014777689900, + 0.010638181300, -0.008718424600, 0.008427026200, -0.007801564500}, + { -0.001063908000, -0.002429340400, 0.002879382200, -0.000887586200, + -0.002586331800, 0.008411993400, -0.019568733800, 0.054053355000, + 0.299282071600, -0.044524238300, 0.023251997700, -0.014901532300, + 0.010741407000, -0.008790859100, 0.008487210700, -0.007858185900}, + { -0.001065244000, -0.002423263600, 0.002875630100, -0.000897083200, + -0.002587760600, 0.008421815400, -0.019630506300, 0.054344370000, + 0.299207159000, -0.044799707800, 0.023436300800, -0.015039485700, + 0.010841905100, -0.008868051200, 0.008543139300, -0.007904789100}, + { -0.001071045600, -0.002422944100, 0.002886222600, -0.000903092500, + -0.002584785900, 0.008436057700, -0.019701857900, 0.054637916200, + 0.299132742800, -0.045064497900, 0.023616380100, -0.015174084700, + 0.010937108800, -0.008952392000, 0.008600802500, -0.007952339000}, + { -0.001072495600, -0.002416975800, 0.002882721100, -0.000913301400, + -0.002582942100, 0.008453838100, -0.019764945900, 0.054929607400, + 0.299058734900, -0.045332341000, 0.023788158500, -0.015304227600, + 0.011042483200, -0.009027145700, 0.008665715200, -0.008001667100}, + { -0.001077376400, -0.002420012800, 0.002885109500, -0.000917271300, + -0.002580907300, 0.008468413700, -0.019836708300, 0.055225670900, + 0.298977325900, -0.045602981800, 0.023970548200, -0.015441580200, + 0.011142744200, -0.009104537200, 0.008722251100, -0.008048876900}, + { -0.001079321400, -0.002411773500, 0.002893394800, -0.000920943200, + -0.002583378800, 0.008478936300, -0.019901475900, 0.055512440700, + 0.298911585300, -0.045870162900, 0.024153171400, -0.015576602500, + 0.011251768000, -0.009181661000, 0.008787159000, -0.008112103700}, + { -0.001084311600, -0.002413690800, 0.002891307200, -0.000929480500, + -0.002571357000, 0.008492748700, -0.019961847400, 0.055805182500, + 0.298830136900, -0.046141044500, 0.024337335900, -0.015715846500, + 0.011353693700, -0.009260656400, 0.008844778800, -0.008160197200}, + { -0.001085440200, -0.002408972300, 0.002891576300, -0.000931811500, + -0.002571374100, 0.008512218400, -0.020029308800, 0.056094866900, + 0.298759763600, -0.046405144200, 0.024519846800, -0.015854163900, + 0.011454721200, -0.009336453400, 0.008911303700, -0.008211115400}, + { -0.001087840400, -0.002400214400, 0.002900157200, -0.000936969600, + -0.002568721700, 0.008526039100, -0.020100315900, 0.056395501800, + 0.298682066000, -0.046677919500, 0.024693755800, -0.015986171300, + 0.011560009200, -0.009425383800, 0.008972850800, -0.008261904500}, + { -0.001092599200, -0.002402600100, 0.002898882600, -0.000948207200, + -0.002568479900, 0.008534249200, -0.020162388700, 0.056680564700, + 0.298599300000, -0.046947568800, 0.024878475500, -0.016126306100, + 0.011662794000, -0.009502392800, 0.009040315200, -0.008313510700}, + { -0.001094674000, -0.002394239600, 0.002907384900, -0.000953100600, + -0.002566762900, 0.008551918400, -0.020227896300, 0.056970953100, + 0.298525257700, -0.047209041900, 0.025060099400, -0.016261962900, + 0.011773596800, -0.009584221800, 0.009097353100, -0.008372136900}, + { -0.001099661700, -0.002396213700, 0.002905172800, -0.000960917900, + -0.002557753000, 0.008556551400, -0.020285138500, 0.057267599400, + 0.298446320200, -0.047478375000, 0.025245702300, -0.016403414400, + 0.011877441200, -0.009662092200, 0.009165535000, -0.008424799700}, + { -0.001100755900, -0.002391414300, 0.002905602300, -0.000963826400, + -0.002557026600, 0.008574970900, -0.020351243300, 0.057560244000, + 0.298365825000, -0.047747110700, 0.025434302500, -0.016536709400, + 0.011980096900, -0.009743311000, 0.009226377800, -0.008478483800}, + { -0.001106899300, -0.002390253900, 0.002913287000, -0.000978876700, + -0.002551352000, 0.008590004000, -0.020415253900, 0.057854601300, + 0.298294423300, -0.048021033600, 0.025612282700, -0.016673561700, + 0.012092889000, -0.009827928200, 0.009288917500, -0.008530542400}, + { -0.001107825500, -0.002385153400, 0.002912420300, -0.000977036900, + -0.002545865100, 0.008597199400, -0.020476810000, 0.058145551600, + 0.298212478700, -0.048288066300, 0.025797833300, -0.016816027500, + 0.012198085900, -0.009907251900, 0.009358594600, -0.008584316600}, + { -0.001113605200, -0.002384780000, 0.002923485400, -0.000983932400, + -0.002542224400, 0.008612798400, -0.020540206700, 0.058438067100, + 0.298128451900, -0.048553232900, 0.025981636800, -0.016954362300, + 0.012312002500, -0.009992841500, 0.009421764400, -0.008636872000}, + { -0.001114931100, -0.002379174400, 0.002919927800, -0.000993979200, + -0.002542420300, 0.008620998100, -0.020602200500, 0.058732787100, + 0.298055673800, -0.048812541600, 0.026165585800, -0.017095016200, + 0.012413140100, -0.010079579700, 0.009490588100, -0.008700892600}, + { -0.001119665800, -0.002382694100, 0.002922998700, -0.000998529900, + -0.002540198000, 0.008638257800, -0.020669669800, 0.059018701600, + 0.297970747600, -0.049076921100, 0.026350960600, -0.017238147400, + 0.012520217600, -0.010164038100, 0.009553595700, -0.008753278600}, + { -0.001121930300, -0.002372946000, 0.002926808400, -0.001007501500, + -0.002530568000, 0.008642272200, -0.020728754500, 0.059312998000, + 0.297893741900, -0.049347375400, 0.026529402400, -0.017376589500, + 0.012634266200, -0.010246944100, 0.009626161500, -0.008809499600}, + { -0.001126794900, -0.002376553600, 0.002930143500, -0.001012468200, + -0.002528098100, 0.008659451000, -0.020796619900, 0.059603377300, + 0.297814833400, -0.049613614700, 0.026720176000, -0.017513336800, + 0.012740654600, -0.010331978800, 0.009690722000, -0.008866407500}, + { -0.001128974000, -0.002366817000, 0.002934650000, -0.001025446300, + -0.002525885700, 0.008665084700, -0.020855847900, 0.059897161500, + 0.297721129900, -0.049883464500, 0.026908098600, -0.017658755900, + 0.012849997300, -0.010418098600, 0.009755024700, -0.008920174300}, + { -0.001129829700, -0.002362132000, 0.002934410200, -0.001024611700, + -0.002517293800, 0.008683823800, -0.020914048900, 0.060185287000, + 0.297640606500, -0.050147624100, 0.027094554000, -0.017800540400, + 0.012966752800, -0.010503877000, 0.009829858100, -0.008978175500}, + { -0.001134965700, -0.002364533600, 0.002933528900, -0.001036539700, + -0.002515641700, 0.008689737800, -0.020973444000, 0.060482529200, + 0.297557115100, -0.050413365200, 0.027273058500, -0.017943142100, + 0.013074472100, -0.010588375400, 0.009890164700, -0.009040073800}, + { -0.001136940000, -0.002355925500, 0.002941289900, -0.001037794300, + -0.002507997500, 0.008695284800, -0.021036808600, 0.060775711600, + 0.297482132700, -0.050678910500, 0.027462057700, -0.018090535100, + 0.013184994200, -0.010672807300, 0.009964812300, -0.009098345900}, + { -0.001142165600, -0.002358437400, 0.002940699400, -0.001050877900, + -0.002502402100, 0.008709501200, -0.021101691400, 0.061067211800, + 0.297396279600, -0.050939241800, 0.027647756700, -0.018232338700, + 0.013303147900, -0.010762989400, 0.010032437200, -0.009154686400}, + { -0.001143817000, -0.002350196200, 0.002949420200, -0.001055769400, + -0.002503786900, 0.008721014300, -0.021156852500, 0.061355453400, + 0.297312718800, -0.051205897900, 0.027831555500, -0.018369198800, + 0.013411298100, -0.010850127500, 0.010098901500, -0.009213325600}, + { -0.001149066000, -0.002352278400, 0.002947918700, -0.001065202900, + -0.002489611700, 0.008732236600, -0.021220051600, 0.061647632000, + 0.297221172300, -0.051473353600, 0.028021068000, -0.018517173300, + 0.013522491900, -0.010935538100, 0.010174446400, -0.009272360200}, + { -0.001149887200, -0.002347967100, 0.002948573300, -0.001067597300, + -0.002492352100, 0.008744611200, -0.021276152500, 0.061941420000, + 0.297140614200, -0.051734216700, 0.028208189500, -0.018660250400, + 0.013639306600, -0.011036161100, 0.010246624400, -0.009332105500}, + { -0.001156099600, -0.002346166500, 0.002954987600, -0.001078834400, + -0.002480260300, 0.008746430300, -0.021336172300, 0.062236110400, + 0.297056151300, -0.052003778300, 0.028400005500, -0.018810443800, + 0.013752383900, -0.011122203700, 0.010319669900, -0.009400556800}, + { -0.001156792500, -0.002341776700, 0.002955577100, -0.001081090900, + -0.002482874600, 0.008758231200, -0.021390879000, 0.062526684700, + 0.296965665500, -0.052264429900, 0.028578727400, -0.018955461900, + 0.013863735500, -0.011211408300, 0.010387174600, -0.009457035400}, + { -0.001163170100, -0.002339996800, 0.002962141700, -0.001093302800, + -0.002466860000, 0.008767593500, -0.021452275300, 0.062822494300, + 0.296881395600, -0.052523860000, 0.028770277400, -0.019092348700, + 0.013982426200, -0.011303346500, 0.010456770600, -0.009515365500}, + { -0.001163895700, -0.002335754800, 0.002963079500, -0.001096137300, + -0.002469118700, 0.008780017100, -0.021511634200, 0.063110931400, + 0.296794244500, -0.052791038800, 0.028962045800, -0.019243677500, + 0.014097865300, -0.011393027300, 0.010536858700, -0.009581441000}, + { -0.001170125300, -0.002333956200, 0.002969450300, -0.001107234800, + -0.002456863700, 0.008780917600, -0.021569686100, 0.063405648500, + 0.296709109200, -0.053052763700, 0.029143138800, -0.019391070400, + 0.014211122400, -0.011483842100, 0.010606181300, -0.009639674100}, + { -0.001170874500, -0.002329660800, 0.002970330900, -0.001110081400, + -0.002459004700, 0.008793030100, -0.021628522100, 0.063695013000, + 0.296618745800, -0.053317589600, 0.029334344500, -0.019542481200, + 0.014326849900, -0.011573598100, 0.010685674300, -0.009702145300}, + { -0.001171914500, -0.002323961500, 0.002966009600, -0.001116909400, + -0.002450355900, 0.008799692100, -0.021679886200, 0.063990447900, + 0.296526171200, -0.053581877800, 0.029529044900, -0.019682702400, + 0.014449006700, -0.011668677900, 0.010757748000, -0.009762586000}, + { -0.001178072500, -0.002323403700, 0.002977740500, -0.001125189300, + -0.002444614700, 0.008813206900, -0.021746613300, 0.064282383100, + 0.296436562600, -0.053839700800, 0.029708485400, -0.019829369800, + 0.014562038800, -0.011759300000, 0.010824029000, -0.009829464000}, + { -0.001179124900, -0.002317979100, 0.002973757000, -0.001132168000, + -0.002435747100, 0.008820504400, -0.021802440100, 0.064574678500, + 0.296352017200, -0.054105959400, 0.029902690900, -0.019983871100, + 0.014680546000, -0.011851836600, 0.010906814900, -0.009898260300}, + { -0.001185031900, -0.002317376800, 0.002984984200, -0.001139351300, + -0.002434510900, 0.008828516100, -0.021853498100, 0.064870816200, + 0.296258886800, -0.054357841800, 0.030089125800, -0.020129945500, + 0.014804636400, -0.011948361700, 0.010980172100, -0.009959891000}, + { -0.001186088400, -0.002311933800, 0.002981060500, -0.001146354300, + -0.002425750100, 0.008835761000, -0.021908764800, 0.065163770900, + 0.296172290700, -0.054626925700, 0.030279417600, -0.020274570600, + 0.014920355400, -0.012042691300, 0.011052809300, -0.010020900500}, + { -0.001192214700, -0.002310334400, 0.002988338000, -0.001161750800, + -0.002421544800, 0.008842662600, -0.021962597300, 0.065452832100, + 0.296075847400, -0.054887446700, 0.030471019600, -0.020427698700, + 0.015038441400, -0.012134860100, 0.011134601100, -0.010085579200}, + { -0.001193029800, -0.002305832600, 0.002988510300, -0.001160745800, + -0.002415287100, 0.008851064800, -0.022018328800, 0.065747514100, + 0.295985303100, -0.055152663300, 0.030655950200, -0.020580030600, + 0.015157088900, -0.012231165800, 0.011208895700, -0.010148001100}, + { -0.001199255200, -0.002303988400, 0.002994947800, -0.001172180300, + -0.002403060700, 0.008854931500, -0.022070359700, 0.066040074000, + 0.295894792100, -0.055414282300, 0.030848876200, -0.020731638800, + 0.015285837100, -0.012331974000, 0.011286215600, -0.010212808300}, + { -0.001200034300, -0.002300011200, 0.002996071300, -0.001175466100, + -0.002404798200, 0.008866742900, -0.022131427200, 0.066328312100, + 0.295807177000, -0.055667568800, 0.031043216800, -0.020877200500, + 0.015401999100, -0.012423245400, 0.011365248300, -0.010290103500}, + { -0.001205321400, -0.002302348400, 0.002994763100, -0.001184119100, + -0.002394208300, 0.008871703300, -0.022183982500, 0.066622310400, + 0.295714067400, -0.055930893500, 0.031228731500, -0.021030519400, + 0.015521915600, -0.012520884000, 0.011440787900, -0.010353664500}, + { -0.001207424300, -0.002292788000, 0.002999278000, -0.001198036500, + -0.002390705100, 0.008878791700, -0.022237804200, 0.066918218000, + 0.295617466000, -0.056188447800, 0.031421091300, -0.021185866100, + 0.015643456400, -0.012619826700, 0.011517277400, -0.010418126800}, + { -0.001212436700, -0.002296230600, 0.003002379300, -0.001199058000, + -0.002383054500, 0.008886317900, -0.022295584100, 0.067206178800, + 0.295523834300, -0.056451724100, 0.031610777900, -0.021331865900, + 0.015760653300, -0.012712731400, 0.011600637200, -0.010484290500}, + { -0.001214616000, -0.002286436600, 0.003006011700, -0.001208867400, + -0.002371663200, 0.008890402300, -0.022347745200, 0.067505660900, + 0.295433201300, -0.056710441700, 0.031804218000, -0.021485136600, + 0.015891596000, -0.012815991100, 0.011680400300, -0.010551509800}, + { -0.001219729000, -0.002290053700, 0.003010061800, -0.001214114500, + -0.002371512600, 0.008900068000, -0.022406367600, 0.067795719500, + 0.295335292700, -0.056969680800, 0.031988588100, -0.021638667600, + 0.016012759800, -0.012915294700, 0.011757803200, -0.010616490000}, + { -0.001221768100, -0.002280163100, 0.003013484800, -0.001223949400, + -0.002359880400, 0.008903658200, -0.022457194700, 0.068091568200, + 0.295234898600, -0.057225248100, 0.032185366300, -0.021787511700, + 0.016132334200, -0.013010433300, 0.011843615700, -0.010688812000}, + { -0.001222885000, -0.002274839200, 0.003009550000, -0.001230956100, + -0.002350338500, 0.008909637200, -0.022513896200, 0.068385169700, + 0.295142943400, -0.057486437700, 0.032372518000, -0.021943625600, + 0.016255565900, -0.013111568500, 0.011922721200, -0.010755431700}, + { -0.001228791200, -0.002274200700, 0.003021279300, -0.001238353400, + -0.002352971000, 0.008913972000, -0.022561101300, 0.068676416600, + 0.295047141500, -0.057751817000, 0.032570797200, -0.022105269700, + 0.016383459600, -0.013216314100, 0.012004282000, -0.010824042300}, + { -0.001229834600, -0.002268726500, 0.003017029900, -0.001245178500, + -0.002343674400, 0.008919626500, -0.022616489300, 0.068966542100, + 0.294944915100, -0.058008634000, 0.032758671600, -0.022247384700, + 0.016509174100, -0.013312472500, 0.012086623000, -0.010900887700}, + { -0.001236321600, -0.002266647300, 0.003023412200, -0.001256945700, + -0.002330427000, 0.008922281200, -0.022670102300, 0.069259760100, + 0.294849437700, -0.058263509000, 0.032953544400, -0.022407065400, + 0.016635331800, -0.013416137200, 0.012167754700, -0.010969358900}, + { -0.001237037300, -0.002262428500, 0.003023848100, -0.001256128400, + -0.002324464800, 0.008933910800, -0.022719357000, 0.069553175900, + 0.294752983200, -0.058521997300, 0.033144550900, -0.022556496100, + 0.016757477900, -0.013517704400, 0.012247285200, -0.011036494600}, + { -0.001243631400, -0.002260491700, 0.003031143500, -0.001272367300, + -0.002318660300, 0.008938942400, -0.022774221100, 0.069848353600, + 0.294654140800, -0.058774188400, 0.033338099000, -0.022715294600, + 0.016882921900, -0.013617730300, 0.012337731100, -0.011112979600}, + { -0.001244616700, -0.002255007200, 0.003026834400, -0.001278495600, + -0.002312971700, 0.008936004500, -0.022826323700, 0.070143346800, + 0.294550501800, -0.059037642400, 0.033527611400, -0.022874644300, + 0.017009920100, -0.013722251500, 0.012419958100, -0.011182377900}, + { -0.001249868500, -0.002258640400, 0.003030359000, -0.001279931900, + -0.002305315700, 0.008946945700, -0.022879051900, 0.070434097200, + 0.294460157000, -0.059293518700, 0.033729073000, -0.023029828700, + 0.017136721300, -0.013827892600, 0.012503092200, -0.011252540100}, + { -0.001252011800, -0.002248666800, 0.003033725600, -0.001289637300, + -0.002293460500, 0.008949854300, -0.022931857800, 0.070729691900, + 0.294357625700, -0.059546703200, 0.033913734300, -0.023182283200, + 0.017269467600, -0.013934406600, 0.012586727200, -0.011323121700}, + { -0.001257327400, -0.002251340000, 0.003033306900, -0.001302187900, + -0.002294842900, 0.008953591700, -0.022981051800, 0.071016115000, + 0.294253594500, -0.059806163900, 0.034116321500, -0.023338009400, + 0.017396575200, -0.014036439400, 0.012678155200, -0.011396549400}, + { -0.001259366100, -0.002242376300, 0.003041364800, -0.001304581400, + -0.002285404200, 0.008958134900, -0.023035573000, 0.071318036600, + 0.294155332000, -0.060059835200, 0.034302863600, -0.023496551600, + 0.017523724800, -0.014142299300, 0.012762833200, -0.011472239700}, + { -0.001264793200, -0.002244873300, 0.003040255700, -0.001313420800, + -0.002274742400, 0.008966178100, -0.023084721700, 0.071604519400, + 0.294053570900, -0.060314068800, 0.034495386400, -0.023649793700, + 0.017653808300, -0.014239964400, 0.012843156400, -0.011540516700}, + { -0.001266776300, -0.002234895800, 0.003043324900, -0.001322245300, + -0.002267675400, 0.008965455900, -0.023127290700, 0.071902922400, + 0.293950616300, -0.060571265600, 0.034693878200, -0.023814273900, + 0.017784544000, -0.014343601700, 0.012932068300, -0.011623815900}, + { -0.001272408000, -0.002238111400, 0.003046799700, -0.001323837000, + -0.002258841200, 0.008970913900, -0.023185828700, 0.072198731500, + 0.293850684700, -0.060823698800, 0.034885265900, -0.023966290300, + 0.017910112100, -0.014448925000, 0.013015910500, -0.011694882200}, + { -0.001274483600, -0.002228201400, 0.003050212200, -0.001333772300, + -0.002247134700, 0.008978152700, -0.023234912400, 0.072492005600, + 0.293746271400, -0.061079622500, 0.035084175400, -0.024131925000, + 0.018043115400, -0.014559744600, 0.013103666900, -0.011769115200}, + { -0.001275293500, -0.002223339400, 0.003046802300, -0.001344236100, + -0.002249832100, 0.008982191000, -0.023282772800, 0.072780111500, + 0.293638750200, -0.061329463600, 0.035275317400, -0.024284154200, + 0.018169987600, -0.014666643200, 0.013189357000, -0.011845461600}, + { -0.001282008300, -0.002220878800, 0.003053154500, -0.001356054900, + -0.002236664600, 0.008988073400, -0.023330451200, 0.073072951600, + 0.293533527800, -0.061588331200, 0.035466290700, -0.024446846900, + 0.018300582900, -0.014771995300, 0.013284279500, -0.011922344800}, + { -0.001282913700, -0.002216603000, 0.003053533900, -0.001354819600, + -0.002234495000, 0.008991178300, -0.023379395200, 0.073372194300, + 0.293431988400, -0.061843667600, 0.035669418700, -0.024601980400, + 0.018440114900, -0.014885760000, 0.013374412100, -0.011998777300}, + { -0.001289852900, -0.002213970900, 0.003059661400, -0.001366666100, + -0.002220358400, 0.008991998000, -0.023433472500, 0.073667719600, + 0.293326498300, -0.062091368600, 0.035856112000, -0.024762400200, + 0.018570057000, -0.014994590500, 0.013461432300, -0.012072689000}, + { -0.001290828100, -0.002208668300, 0.003055268400, -0.001372665000, + -0.002215250000, 0.008992759700, -0.023479470700, 0.073961694600, + 0.293216599600, -0.062347556700, 0.036051519300, -0.024918766800, + 0.018700312600, -0.015104288800, 0.013548919900, -0.012146903000}, + { -0.001297565000, -0.002206188900, 0.003061515200, -0.001384640400, + -0.002201711800, 0.008998523600, -0.023530332000, 0.074248575600, + 0.293108050300, -0.062599496300, 0.036250520900, -0.025085987100, + 0.018835062100, -0.015213765300, 0.013648384700, -0.012231594800}, + { -0.001298486200, -0.002201990500, 0.003061911500, -0.001383193600, + -0.002199540200, 0.009001070400, -0.023577987000, 0.074548400000, + 0.293006277700, -0.062848029800, 0.036443834300, -0.025242884300, + 0.018970177400, -0.015316494200, 0.013733603800, -0.012304800000}, + { -0.001305173000, -0.002199287800, 0.003067771800, -0.001393998700, + -0.002190657800, 0.008998337100, -0.023620522900, 0.074841359700, + 0.292893802300, -0.063101285300, 0.036634397500, -0.025407276700, + 0.019104315500, -0.015429059800, 0.013823584100, -0.012381206600}, + { -0.001306486000, -0.002193998000, 0.003063717500, -0.001401081300, + -0.002180765400, 0.009007040500, -0.023673990600, 0.075136252200, + 0.292786821600, -0.063356530800, 0.036831074500, -0.025565504100, + 0.019236546600, -0.015541317500, 0.013913853300, -0.012457823800}, + { -0.001313321900, -0.002191380800, 0.003069674600, -0.001412157700, + -0.002171714300, 0.009004937300, -0.023721555300, 0.075427591700, + 0.292680753800, -0.063607547900, 0.037031767000, -0.025735481500, + 0.019375159900, -0.015657767200, 0.014007257400, -0.012537078000}, + { -0.001314354900, -0.002186268300, 0.003065150800, -0.001417698000, + -0.002167557500, 0.009010001600, -0.023759323400, 0.075724712100, + 0.292576319400, -0.063853917500, 0.037225319700, -0.025892093800, + 0.019506037600, -0.015764904400, 0.014104155900, -0.012616476800}, + { -0.001321218400, -0.002184458600, 0.003076233500, -0.001422547300, + -0.002156670200, 0.009017151300, -0.023810871100, 0.076019700600, + 0.292466530500, -0.064106762900, 0.037421391100, -0.026050975100, + 0.019639469300, -0.015878862800, 0.014196955700, -0.012699499300}, + { -0.001322405100, -0.002179145600, 0.003071783900, -0.001428408900, + -0.002151344500, 0.009017528200, -0.023860018100, 0.076314227000, + 0.292354833000, -0.064357589600, 0.037613307800, -0.026218842900, + 0.019781456700, -0.015987370400, 0.014287400700, -0.012776898100}, + { -0.001329411400, -0.002176246500, 0.003077588100, -0.001439416300, + -0.002142422000, 0.009015226300, -0.023906702500, 0.076607212500, + 0.292243989500, -0.064609585900, 0.037810235300, -0.026378597000, + 0.019915634000, -0.016100609100, 0.014374613700, -0.012862479300}, + { -0.001331821400, -0.002165817700, 0.003080409800, -0.001449188900, + -0.002130265600, 0.009021448800, -0.023957398700, 0.076903557900, + 0.292129090700, -0.064858733100, 0.038005130000, -0.026536730800, + 0.020047933100, -0.016209483700, 0.014473806100, -0.012943770500}, + { -0.001332912300, -0.002160549700, 0.003075666900, -0.001454699900, + -0.002125747900, 0.009026360900, -0.023998381000, 0.077196425000, + 0.292014040700, -0.065102449400, 0.038203933500, -0.026707041800, + 0.020188140300, -0.016328233200, 0.014569262800, -0.013025077600}, + { -0.001339767200, -0.002158494200, 0.003086316400, -0.001458535300, + -0.002119522600, 0.009025408000, -0.024045414700, 0.077491289100, + 0.291898436700, -0.065350608700, 0.038399388600, -0.026866946500, + 0.020323488600, -0.016444050200, 0.014663919000, -0.013110011800}, + { -0.001341225900, -0.002153019500, 0.003081710900, -0.001464447200, + -0.002114130700, 0.009025651700, -0.024094239400, 0.077792721000, + 0.291786795400, -0.065599338300, 0.038596149400, -0.027027766200, + 0.020459538900, -0.016560228700, 0.014758176300, -0.013190641200}, + { -0.001348116400, -0.002150215500, 0.003087524600, -0.001475208500, + -0.002105937200, 0.009027973200, -0.024136747400, 0.078077054200, + 0.291673553600, -0.065848400700, 0.038789809200, -0.027198631300, + 0.020605326600, -0.016672927800, 0.014852276800, -0.013270943900}, + { -0.001349842500, -0.002144524900, 0.003082911700, -0.001481921800, + -0.002095570400, 0.009035188200, -0.024188073400, 0.078381474100, + 0.291557196100, -0.066093324400, 0.038984672800, -0.027358305900, + 0.020740566100, -0.016789045200, 0.014946715800, -0.013351892900}, + { -0.001357082800, -0.002141367000, 0.003088470800, -0.001492702300, + -0.002087255200, 0.009036993800, -0.024226476800, 0.078678890800, + 0.291445410400, -0.066341740000, 0.039182293600, -0.027520351800, + 0.020877384400, -0.016902143900, 0.015049535100, -0.013436720300}, + { -0.001358430400, -0.002135814900, 0.003082938500, -0.001493462900, + -0.002074639200, 0.009034254800, -0.024276630700, 0.078972791400, + 0.291332516600, -0.066588317200, 0.039375542700, -0.027690881700, + 0.021019023100, -0.017023199600, 0.015149195700, -0.013526024300}, + { -0.001365518600, -0.002132688700, 0.003088416900, -0.001504139700, + -0.002066451500, 0.009036431600, -0.024318259900, 0.079258669800, + 0.291213669600, -0.066833494600, 0.039572395400, -0.027853162500, + 0.021157264500, -0.017142104300, 0.015245837400, -0.013608286800}, + { -0.001366787300, -0.002128108700, 0.003088564100, -0.001502425900, + -0.002064956100, 0.009042893100, -0.024359858000, 0.079560972200, + 0.291093788100, -0.067075438600, 0.039767446600, -0.028015676300, + 0.021300179900, -0.017253688600, 0.015339335300, -0.013689059900}, + { -0.001374342300, -0.002124476100, 0.003093955600, -0.001513509600, + -0.002055553100, 0.009039889200, -0.024409237700, 0.079856769100, + 0.290971758300, -0.067326482200, 0.039967168700, -0.028180320800, + 0.021440417100, -0.017374226900, 0.015437926100, -0.013773229200}, + { -0.001376849300, -0.002113994500, 0.003096377800, -0.001522077700, + -0.002048697200, 0.009043386000, -0.024453193600, 0.080155270500, + 0.290861113600, -0.067572563200, 0.040162405000, -0.028357551400, + 0.021577567700, -0.017494757400, 0.015536744300, -0.013857978600}, + { -0.001383359700, -0.002115681000, 0.003094623000, -0.001529706600, + -0.002042499600, 0.009046978300, -0.024496193000, 0.080448884900, + 0.290742857900, -0.067815030400, 0.040359106100, -0.028520619600, + 0.021716480000, -0.017610720400, 0.015643569600, -0.013950756900}, + { -0.001385829000, -0.002104702000, 0.003095698800, -0.001533174400, + -0.002027861900, 0.009041815800, -0.024543547500, 0.080744453800, + 0.290621547300, -0.068055928700, 0.040555731500, -0.028685336800, + 0.021861411400, -0.017724211200, 0.015739387800, -0.014033359700}, + { -0.001392505100, -0.002106039500, 0.003093740800, -0.001540744900, + -0.002021575300, 0.009045183800, -0.024586190700, 0.081040072300, + 0.290494347700, -0.068302308800, 0.040754474000, -0.028850387900, + 0.022002842700, -0.017846481100, 0.015839406100, -0.014118938600}, + { -0.001395190900, -0.002095226400, 0.003095699600, -0.001548946600, + -0.002014963700, 0.009048347000, -0.024629081800, 0.081340267800, + 0.290377478100, -0.068542945900, 0.040946718000, -0.029021815100, + 0.022146976500, -0.017970542200, 0.015941116300, -0.014206281900}, + { -0.001401882300, -0.002096680100, 0.003093691000, -0.001556387000, + -0.002008669700, 0.009051397000, -0.024670766100, 0.081635398000, + 0.290253164200, -0.068780397800, 0.041142095700, -0.029185104300, + 0.022287585300, -0.018092816300, 0.016042263500, -0.014297586900}, + { -0.001404564500, -0.002085806300, 0.003095765300, -0.001564993600, + -0.002001893100, 0.009055027000, -0.024718526100, 0.081930443400, + 0.290131292800, -0.069028755000, 0.041343948500, -0.029354488900, + 0.022437490000, -0.018211358000, 0.016142525900, -0.014383790400}, + { -0.001411478900, -0.002086898100, 0.003092724100, -0.001567128600, + -0.001988704700, 0.009055155500, -0.024758694700, 0.082230235800, + 0.290011569200, -0.069267292000, 0.041541189800, -0.029519935000, + 0.022579662500, -0.018334760000, 0.016243913500, -0.014471061400}, + { -0.001414337900, -0.002075770200, 0.003094262100, -0.001575159300, + -0.001982046700, 0.009058174200, -0.024801087800, 0.082532448100, + 0.289888685100, -0.069503998900, 0.041737590700, -0.029689051900, + 0.022713561300, -0.018449559000, 0.016349891900, -0.014559627800}, + { -0.001416907600, -0.002064831500, 0.003096053600, -0.001582625700, + -0.001980678900, 0.009054717400, -0.024845070000, 0.082826383600, + 0.289764874500, -0.069755255200, 0.041932542200, -0.029856412200, + 0.022863023200, -0.018567891400, 0.016450370300, -0.014646034300}, + { -0.001423747700, -0.002066861800, 0.003098336700, -0.001577810900, + -0.001970699600, 0.009057517500, -0.024891262300, 0.083121682100, + 0.289643060400, -0.069991474400, 0.042129344000, -0.030022736600, + 0.023006696000, -0.018693430700, 0.016554974400, -0.014740487700}, + { -0.001426747600, -0.002055331700, 0.003099557300, -0.001585752700, + -0.001964069200, 0.009060147700, -0.024932767200, 0.083425985400, + 0.289510095600, -0.070230755300, 0.042326526200, -0.030188546200, + 0.023150063400, -0.018818329000, 0.016657963500, -0.014829329300}, + { -0.001434061400, -0.002056302800, 0.003097143300, -0.001592655700, + -0.001958070000, 0.009063562600, -0.024979066000, 0.083722905800, + 0.289383840200, -0.070462608200, 0.042516727200, -0.030361552500, + 0.023301552900, -0.018938392400, 0.016759937100, -0.014917519700}, + { -0.001436913900, -0.002044866400, 0.003097508000, -0.001595199500, + -0.001944654100, 0.009063774800, -0.025022964500, 0.084018453400, + 0.289257793800, -0.070696301300, 0.042714039500, -0.030533338900, + 0.023439043400, -0.019061283500, 0.016861988700, -0.015005498900}, + { -0.001445044100, -0.002040355800, 0.003101858000, -0.001604479400, + -0.001941756600, 0.009058701400, -0.025064553400, 0.084313384800, + 0.289127389400, -0.070941485100, 0.042906585900, -0.030698827600, + 0.023583328300, -0.019187203800, 0.016966497900, -0.015095192700}, + { -0.001447330000, -0.002034312100, 0.003096284800, -0.001609315800, + -0.001937412700, 0.009063501400, -0.025112304400, 0.084617972200, + 0.289002944200, -0.071173336500, 0.043102902900, -0.030866540700, + 0.023733865500, -0.019307909900, 0.017070271400, -0.015190068200}, + { -0.001455518600, -0.002029502500, 0.003099574200, -0.001613820700, + -0.001923185400, 0.009067220900, -0.025147413900, 0.084911325500, + 0.288870320800, -0.071411887600, 0.043302468300, -0.031035919800, + 0.023880818700, -0.019436707600, 0.017176704600, -0.015281581500}, + { -0.001457861300, -0.002023130000, 0.003093622800, -0.001617413500, + -0.001924642300, 0.009065729600, -0.025192315000, 0.085215783300, + 0.288739140200, -0.071653858800, 0.043494252000, -0.031201529400, + 0.024025743900, -0.019563622700, 0.017282467800, -0.015372811900}, + { -0.001466497000, -0.002018075200, 0.003096710200, -0.001621883700, + -0.001909503200, 0.009063993200, -0.025234780000, 0.085517904200, + 0.288613216300, -0.071884560100, 0.043691563100, -0.031375546100, + 0.024169673500, -0.019676404700, 0.017390367400, -0.015464035900}, + { -0.001469685300, -0.002006309300, 0.003097408700, -0.001629128000, + -0.001904355000, 0.009073340000, -0.025278532500, 0.085809125600, + 0.288475887700, -0.072118967800, 0.043889313200, -0.031543979900, + 0.024316777100, -0.019805930700, 0.017497985500, -0.015556426100}, + { -0.001477449400, -0.002006818100, 0.003094518400, -0.001634855100, + -0.001903992100, 0.009069894600, -0.025320844800, 0.086112304400, + 0.288347603800, -0.072351635400, 0.044077625700, -0.031707875300, + 0.024461035300, -0.019933041100, 0.017605115800, -0.015654004700}, + { -0.001480767400, -0.001994653500, 0.003094108300, -0.001636592700, + -0.001891836000, 0.009074959300, -0.025357212300, 0.086414765800, + 0.288212217900, -0.072585572700, 0.044276425800, -0.031879037500, + 0.024614982500, -0.020057087700, 0.017710651600, -0.015745786000}, + { -0.001488858500, -0.001993991000, 0.003085609100, -0.001648979900, + -0.001888369000, 0.009070080400, -0.025403194900, 0.086711998100, + 0.288078851000, -0.072825294900, 0.044470365700, -0.032052144100, + 0.024755980300, -0.020183755000, 0.017817156300, -0.015837646000}, + { -0.001492305300, -0.001981877500, 0.003085015800, -0.001650306000, + -0.001876815400, 0.009076396600, -0.025444714700, 0.087007865500, + 0.287948850700, -0.073051883700, 0.044666232000, -0.032220613500, + 0.024903471000, -0.020314095600, 0.017925901000, -0.015931594800}, + { -0.001501262300, -0.001976310900, 0.003088203600, -0.001658952100, + -0.001873996900, 0.009070381000, -0.025484515600, 0.087312662900, + 0.287808844900, -0.073281372400, 0.044862930000, -0.032390709200, + 0.025057752300, -0.020438589400, 0.018033299200, -0.016030164800}, + { -0.001503969900, -0.001969379300, 0.003080858100, -0.001657013000, + -0.001864713500, 0.009078560600, -0.025527589100, 0.087611730400, + 0.287669639200, -0.073516228400, 0.045054475300, -0.032563221800, + 0.025198358100, -0.020565897500, 0.018140302000, -0.016122658700}, + { -0.001513327100, -0.001963597200, 0.003083968700, -0.001665725900, + -0.001862252200, 0.009073875600, -0.025573878100, 0.087916911500, + 0.287535419700, -0.073748515200, 0.045259152900, -0.032729171400, + 0.025351871200, -0.020690412400, 0.018247246600, -0.016215838300}, + { -0.001516114100, -0.001956698100, 0.003076434500, -0.001663416900, + -0.001853404200, 0.009082036700, -0.025616230100, 0.088215611900, + 0.287398826600, -0.073974168600, 0.045445650100, -0.032893481400, + 0.025497283800, -0.020820608400, 0.018361514000, -0.016303461100}, + { -0.001525545100, -0.001950784000, 0.003079287600, -0.001671519700, + -0.001851968500, 0.009083077500, -0.025655699800, 0.088518791800, + 0.287263294400, -0.074204448300, 0.045645401700, -0.033070898400, + 0.025642665600, -0.020951717000, 0.018471685500, -0.016399128000}, + { -0.001529263800, -0.001938044200, 0.003078214900, -0.001672419000, + -0.001840528300, 0.009088750200, -0.025695953100, 0.088817903900, + 0.287119707900, -0.074435467900, 0.045835202700, -0.033239161900, + 0.025796245500, -0.021077164200, 0.018580469200, -0.016498716200}, + { -0.001538275800, -0.001937151500, 0.003074065100, -0.001676983800, + -0.001841296500, 0.009086549100, -0.025743532000, 0.089126683900, + 0.286977836300, -0.074664683000, 0.046023517900, -0.033404937000, + 0.025943420100, -0.021207672000, 0.018690756000, -0.016594569100}, + { -0.001542197500, -0.001924344300, 0.003072425300, -0.001676580800, + -0.001835675000, 0.009085971800, -0.025781349400, 0.089429913200, + 0.286843179800, -0.074884840400, 0.046219477100, -0.033581867500, + 0.026093533900, -0.021331826000, 0.018797704300, -0.016688282800}, + { -0.001546377000, -0.001910937800, 0.003070488600, -0.001676528000, + -0.001825134600, 0.009092452800, -0.025821879500, 0.089731283500, + 0.286693376000, -0.075110245000, 0.046405855900, -0.033746731200, + 0.026239733700, -0.021462324800, 0.018907663300, -0.016783704800}, + { -0.001555232200, -0.001910366600, 0.003066530900, -0.001681411600, + -0.001826242300, 0.009095852900, -0.025867448300, 0.090031502000, + 0.286552421600, -0.075335881700, 0.046610354100, -0.033919719800, + 0.026389844800, -0.021587898200, 0.019017118400, -0.016884247400}, + { -0.001559561500, -0.001896839700, 0.003064309200, -0.001680332600, + -0.001821529400, 0.009096027200, -0.025905423200, 0.090337279400, + 0.286407594900, -0.075562967900, 0.046799004200, -0.034087052200, + 0.026538690300, -0.021720457700, 0.019128983200, -0.016981596000}, + { -0.001569153000, -0.001894703800, 0.003054176700, -0.001691600600, + -0.001814795900, 0.009104248700, -0.025952532400, 0.090639789400, + 0.286262228400, -0.075788745300, 0.046987841400, -0.034255105000, + 0.026693327400, -0.021847402400, 0.019238909500, -0.017077386900}, + { -0.001573670100, -0.001880890400, 0.003051424200, -0.001690153500, + -0.001810064900, 0.009104300700, -0.025990224500, 0.090946916500, + 0.286112971700, -0.076007137400, 0.047182370700, -0.034430712700, + 0.026838198900, -0.021978862300, 0.019350009900, -0.017174506000}, + { -0.001583828300, -0.001873935800, 0.003052700600, -0.001692015100, + -0.001803039800, 0.009103004000, -0.026032138000, 0.091251439600, + 0.285975563800, -0.076238222500, 0.047381154500, -0.034597273100, + 0.026994361100, -0.022107866200, 0.019462639700, -0.017278016500}, + { -0.001587867000, -0.001865780400, 0.003044301800, -0.001692821000, + -0.001807759500, 0.009109488100, -0.026079677000, 0.091556728800, + 0.285823700200, -0.076458633000, 0.047567511000, -0.034768617100, + 0.027136848800, -0.022237957200, 0.019573449500, -0.017374543300}, + { -0.001598323500, -0.001858063000, 0.003044500800, -0.001693384100, + -0.001801618700, 0.009107820400, -0.026115241800, 0.091863686900, + 0.285671811500, -0.076679031600, 0.047753629900, -0.034936118500, + 0.027291613500, -0.022364869500, 0.019683203200, -0.017470863500}, + { -0.001603558400, -0.001843678200, 0.003041601400, -0.001692544800, + -0.001791780100, 0.009115656500, -0.026162495500, 0.092174773000, + 0.285524247500, -0.076901860200, 0.047947719600, -0.035103343800, + 0.027432475300, -0.022494731800, 0.019794038100, -0.017567749800}, + { -0.001613422400, -0.001841112200, 0.003030366000, -0.001701864700, + -0.001792125700, 0.009123179000, -0.026199049700, 0.092474868600, + 0.285372738700, -0.077116537300, 0.048141797500, -0.035275280700, + 0.027591688900, -0.022626375700, 0.019908421800, -0.017673081200}, + { -0.001618684700, -0.001826555800, 0.003026957800, -0.001699564500, + -0.001788644700, 0.009124739900, -0.026242686200, 0.092784329800, + 0.285223614300, -0.077336969100, 0.048328815000, -0.035443127000, + 0.027742539400, -0.022762632500, 0.020029767800, -0.017767797800}, + { -0.001629703000, -0.001818503700, 0.003027010500, -0.001700124000, + -0.001782837800, 0.009124974400, -0.026290376100, 0.093086772800, + 0.285077698000, -0.077560338000, 0.048525355700, -0.035614488000, + 0.027892719100, -0.022889140600, 0.020140353900, -0.017864500900}, + { -0.001634392700, -0.001809252200, 0.003016455700, -0.001693615300, + -0.001782780900, 0.009129169500, -0.026335334700, 0.093393907400, + 0.284916529600, -0.077771874700, 0.048706755700, -0.035778114400, + 0.028040064200, -0.023021819100, 0.020253539400, -0.017963440300}, + { -0.001645793200, -0.001800824700, 0.003016678700, -0.001699031600, + -0.001784607900, 0.009137987500, -0.026373328800, 0.093702911100, + 0.284764041400, -0.077988760200, 0.048893523500, -0.035952564600, + 0.028191490700, -0.023149218600, 0.020364436200, -0.018061008600}, + { -0.001651378600, -0.001785770200, 0.003012636900, -0.001696266500, + -0.001781460200, 0.009139701600, -0.026416553200, 0.094014760000, + 0.284608657200, -0.078204266000, 0.049084885500, -0.036119295000, + 0.028333282800, -0.023280404100, 0.020477739000, -0.018165847100}, + { -0.001662265200, -0.001781999900, 0.002999487700, -0.001698314300, + -0.001777143500, 0.009146283300, -0.026457830700, 0.094316696700, + 0.284457324700, -0.078421164800, 0.049272679100, -0.036289687800, + 0.028492007500, -0.023412213700, 0.020592415900, -0.018266186600}, + { -0.001668313800, -0.001766316400, 0.002994642500, -0.001694643800, + -0.001774906500, 0.009148770700, -0.026501577500, 0.094630560000, + 0.284297024000, -0.078632258000, 0.049461377000, -0.036454574300, + 0.028632330500, -0.023541974700, 0.020703740000, -0.018364139000}, + { -0.001680418200, -0.001756770400, 0.002992954400, -0.001693172900, + -0.001771057900, 0.009150120900, -0.026549370400, 0.094937302500, + 0.284139414500, -0.078843755900, 0.049644890900, -0.036622085000, + 0.028788840800, -0.023671874000, 0.020817422100, -0.018463951300}, + { -0.001686115500, -0.001746931600, 0.002982552200, -0.001691727400, + -0.001778570200, 0.009164052800, -0.026591723800, 0.095258665900, + 0.283981176300, -0.079052887900, 0.049827569900, -0.036793287300, + 0.028933443100, -0.023805663500, 0.020933121700, -0.018571534300}, + { -0.001698209200, -0.001736659300, 0.002974894800, -0.001696435000, + -0.001772231300, 0.009168743300, -0.026630607200, 0.095561100700, + 0.283824708400, -0.079266196800, 0.050019918900, -0.036957408700, + 0.029090435800, -0.023942524200, 0.021041132500, -0.018668595300}, + { -0.001704658800, -0.001720493300, 0.002969448100, -0.001691965100, + -0.001770702400, 0.009171925300, -0.026679621900, 0.095870802000, + 0.283660491100, -0.079472717100, 0.050201664600, -0.037129754000, + 0.029241121700, -0.024070689600, 0.021159356200, -0.018761561600}, + { -0.001717442700, -0.001710235900, 0.002967018600, -0.001689724600, + -0.001768308200, 0.009180232100, -0.026722407800, 0.096183264500, + 0.283504106100, -0.079683587600, 0.050392415300, -0.037297550600, + 0.029384739600, -0.024204158900, 0.021274150600, -0.018862478100}, + { -0.001723295500, -0.001699869700, 0.002955079600, -0.001681516300, + -0.001770832400, 0.009192163800, -0.026767354800, 0.096492955100, + 0.283337590700, -0.079887593500, 0.050578218900, -0.037457346700, + 0.029538083300, -0.024333413500, 0.021388132400, -0.018968488000}, + { -0.001736577200, -0.001687977400, 0.002945624400, -0.001683731000, + -0.001771162500, 0.009186304800, -0.026811221400, 0.096806832700, + 0.283171957600, -0.080100778400, 0.050764840500, -0.037632549500, + 0.029686120900, -0.024469290200, 0.021505473200, -0.019071619200}, + { -0.001743866400, -0.001670904800, 0.002939243500, -0.001678044700, + -0.001772074700, 0.009197158400, -0.026855831200, 0.097122904600, + 0.283008922400, -0.080311308100, 0.050945404700, -0.037790101700, + 0.029837934200, -0.024597228000, 0.021617945900, -0.019170951200}, + { -0.001756624600, -0.001665754600, 0.002930209300, -0.001677015500, + -0.001779605600, 0.009211401200, -0.026902305100, 0.097436064800, + 0.282834989500, -0.080507440900, 0.051121120600, -0.037958156200, + 0.029986133000, -0.024723351000, 0.021728932000, -0.019269257400}, + { -0.001764156100, -0.001648349000, 0.002923256000, -0.001670895300, + -0.001780604500, 0.009222427700, -0.026946759500, 0.097753369800, + 0.282668132600, -0.080708954700, 0.051306676900, -0.038122909500, + 0.030128070300, -0.024856282600, 0.021844654800, -0.019377155600}, + { -0.001778008400, -0.001635839700, 0.002912741900, -0.001671632200, + -0.001783286000, 0.009223573800, -0.026984567000, 0.098066057000, + 0.282503350600, -0.080910856000, 0.051487224000, -0.038289759200, + 0.030286004000, -0.024988398200, 0.021960407900, -0.019479635100}, + { -0.001785075000, -0.001624082400, 0.002899683900, -0.001662356200, + -0.001786186700, 0.009231168400, -0.027042545900, 0.098383808800, + 0.282326299200, -0.081120147400, 0.051668545300, -0.038454440300, + 0.030434707800, -0.025122240000, 0.022073795700, -0.019570606700}, + { -0.001799471600, -0.001611646700, 0.002894723100, -0.001657398400, + -0.001786417100, 0.009241521900, -0.027086143300, 0.098702548800, + 0.282153907600, -0.081315971000, 0.051850662000, -0.038616812900, + 0.030574773900, -0.025253455800, 0.022187350300, -0.019671249500}, + { -0.001808127200, -0.001592362400, 0.002880377400, -0.001655707600, + -0.001785903000, 0.009251959300, -0.027135898100, 0.099021663300, + 0.281989192800, -0.081515568600, 0.052030462600, -0.038783583100, + 0.030733042100, -0.025387269800, 0.022306311600, -0.019782364100}, + { -0.001822507700, -0.001579820600, 0.002875237500, -0.001649711300, + -0.001792635900, 0.009256393500, -0.027175974500, 0.099338786400, + 0.281816351400, -0.081716496200, 0.052207335300, -0.038945254600, + 0.030878936400, -0.025512202200, 0.022416781800, -0.019880568400}, + { -0.001830818000, -0.001566710200, 0.002860056400, -0.001637727000, + -0.001799068700, 0.009272886300, -0.027230252500, 0.099657892600, + 0.281637176300, -0.081905722200, 0.052385802800, -0.039105374000, + 0.031017613200, -0.025642740200, 0.022530400400, -0.019981165400}, + { -0.001845760100, -0.001552740800, 0.002847881000, -0.001636760100, + -0.001804192200, 0.009282507800, -0.027269207300, 0.099967050900, + 0.281460005200, -0.082107814900, 0.052574179500, -0.039274706000, + 0.031169977600, -0.025773553200, 0.022645343300, -0.020082641400}, + { -0.001855192900, -0.001533110500, 0.002838490600, -0.001627660100, + -0.001808517000, 0.009296869200, -0.027321921500, 0.100291413500, + 0.281286138200, -0.082304345000, 0.052741885600, -0.039434316600, + 0.031323202100, -0.025903756400, 0.022761936700, -0.020192073400}, + { -0.001869926500, -0.001525588000, 0.002826213600, -0.001617884900, + -0.001812957700, 0.009311303900, -0.027373640000, 0.100609916000, + 0.281103932300, -0.082490397700, 0.052919415400, -0.039594468800, + 0.031463335700, -0.026042393200, 0.022877455000, -0.020285435000}, + { -0.001879711300, -0.001504548000, 0.002809677500, -0.001613216100, + -0.001820868000, 0.009318026600, -0.027421788700, 0.100932867600, + 0.280922614500, -0.082692676700, 0.053098254300, -0.039758154000, + 0.031611536400, -0.026169842500, 0.022990763600, -0.020386038100}, + { -0.001896148500, -0.001489715900, 0.002802122100, -0.001605491000, + -0.001823983800, 0.009331442300, -0.027473598000, 0.101258880600, + 0.280742505700, -0.082878251100, 0.053275619900, -0.039919465700, + 0.031757335000, -0.026295748600, 0.023102669400, -0.020486105200}, + { -0.001906217800, -0.001469185600, 0.002791306400, -0.001593896400, + -0.001837111200, 0.009347882600, -0.027518324200, 0.101581276800, + 0.280564653200, -0.083070602600, 0.053441716200, -0.040078628800, + 0.031911263600, -0.026426870800, 0.023219719400, -0.020596163500}, + { -0.001922140900, -0.001459273400, 0.002770198900, -0.001581275300, + -0.001835136600, 0.009358983900, -0.027567373500, 0.101900357800, + 0.280377353300, -0.083256870000, 0.053610471900, -0.040233651400, + 0.032046118400, -0.026554662000, 0.023331738800, -0.020695687800}, + { -0.001933077600, -0.001437420100, 0.002757857900, -0.001568643300, + -0.001848435300, 0.009370146400, -0.027619025000, 0.102229591200, + 0.280184129200, -0.083442004400, 0.053787235200, -0.040395032300, + 0.032192706400, -0.026681026200, 0.023444081100, -0.020796180800}, + { -0.001950196300, -0.001421126700, 0.002743236000, -0.001565501700, + -0.001850389700, 0.009389127400, -0.027669483100, 0.102547554800, + 0.280004451900, -0.083632987700, 0.053961014100, -0.040555637300, + 0.032339646100, -0.026814717500, 0.023552023700, -0.020894037400}, + { -0.001960990100, -0.001399562800, 0.002731039900, -0.001552949300, + -0.001863555500, 0.009399847000, -0.027719967200, 0.102875847200, + 0.279815921900, -0.083810986900, 0.054134314800, -0.040714985700, + 0.032485359700, -0.026941818000, 0.023672059200, -0.020996580900}, + { -0.001978335000, -0.001388824500, 0.002715202100, -0.001538161700, + -0.001879709700, 0.009419594300, -0.027767473800, 0.103205041100, + 0.279619234800, -0.083997214900, 0.054302452500, -0.040870290800, + 0.032621932300, -0.027071437200, 0.023785545000, -0.021097697500}, + { -0.001990835900, -0.001364801200, 0.002695060500, -0.001530065500, + -0.001885898300, 0.009436950100, -0.027828661600, 0.103538923200, + 0.279432441200, -0.084179631800, 0.054468678800, -0.041018806300, + 0.032768596100, -0.027197497600, 0.023898644100, -0.021198957800}, + { -0.002009220500, -0.001347505300, 0.002684579000, -0.001518047500, + -0.001900093500, 0.009455425900, -0.027880462700, 0.103861290100, + 0.279243346600, -0.084361255500, 0.054635449300, -0.041174711300, + 0.032911712700, -0.027321991400, 0.024009912800, -0.021298248700}, + { -0.002021097800, -0.001323447400, 0.002663196100, -0.001502518600, + -0.001906698600, 0.009462422300, -0.027927521300, 0.104190543900, + 0.279042526200, -0.084538637100, 0.054809534600, -0.041335183200, + 0.033058704200, -0.027455391500, 0.024118477100, -0.021403770100}, + { -0.002040050600, -0.001311257700, 0.002645934000, -0.001486507400, + -0.001918547800, 0.009490451500, -0.027986541700, 0.104526215400, + 0.278849784900, -0.084713840400, 0.054965250700, -0.041491644800, + 0.033194974400, -0.027586029200, 0.024240038400, -0.021501456000}, + { -0.002053034800, -0.001286367400, 0.002624167700, -0.001475643600, + -0.001933711100, 0.009510206500, -0.028039339300, 0.104851735700, + 0.278653475900, -0.084889121800, 0.055128391400, -0.041644581200, + 0.033335865400, -0.027708847400, 0.024350066900, -0.021599986800}, + { -0.002073264900, -0.001266673600, 0.002611261800, -0.001461338600, + -0.001949621100, 0.009524686500, -0.028099508700, 0.105189430300, + 0.278450786400, -0.085067297700, 0.055292010700, -0.041798158600, + 0.033477267300, -0.027831832100, 0.024460899000, -0.021699539700}, + { -0.002086264900, -0.001241753500, 0.002589582900, -0.001451556400, + -0.001957811700, 0.009549327200, -0.028154247100, 0.105517476900, + 0.278250526200, -0.085239563600, 0.055460095600, -0.041944857600, + 0.033615404000, -0.027960051300, 0.024565652100, -0.021801611400}, + { -0.002106926400, -0.001221321700, 0.002575063400, -0.001429511100, + -0.001970246700, 0.009568367500, -0.028207292800, 0.105852072900, + 0.278047516100, -0.085416983300, 0.055624216900, -0.042099685900, + 0.033757782300, -0.028084273300, 0.024677037500, -0.021901583000}, + { -0.002120617500, -0.001202056000, 0.002552535400, -0.001408007500, + -0.001993279700, 0.009595095600, -0.028266589600, 0.106192034600, + 0.277843916500, -0.085583072500, 0.055780937400, -0.042248483600, + 0.033896113600, -0.028206269200, 0.024793027300, -0.021995201800}, + { -0.002142396800, -0.001179865400, 0.002530754500, -0.001396393300, + -0.002009028700, 0.009610046500, -0.028326674100, 0.106531925400, + 0.277639650500, -0.085747792700, 0.055936365900, -0.042394273800, + 0.034025320000, -0.028330041100, 0.024903208900, -0.022094548100}, + { -0.002157159200, -0.001153285000, 0.002512274500, -0.001370844300, + -0.002024860800, 0.009632664400, -0.028388245500, 0.106864987700, + 0.277432448700, -0.085921130000, 0.056098971500, -0.042548622300, + 0.034168711000, -0.028462099000, 0.025011964800, -0.022199944300}, + { -0.002179375900, -0.001130552500, 0.002489758900, -0.001358263800, + -0.002042234800, 0.009654833100, -0.028443348800, 0.107203619900, + 0.277226300200, -0.086083834300, 0.056253954500, -0.042695656200, + 0.034305305000, -0.028581781400, 0.025119808500, -0.022297564100}, + { -0.002195061100, -0.001108669600, 0.002464213000, -0.001333423100, + -0.002068874300, 0.009685232500, -0.028506052900, 0.107550806200, + 0.277004327300, -0.086243310500, 0.056405162700, -0.042840028100, + 0.034439922500, -0.028700012900, 0.025226614100, -0.022394383100}, + { -0.002217436800, -0.001085689500, 0.002440979000, -0.001314845000, + -0.002074387600, 0.009710099800, -0.028567755500, 0.107884769000, + 0.276798805200, -0.086406171500, 0.056561115200, -0.042988680400, + 0.034577788700, -0.028822677500, 0.025344724600, -0.022496001600}, + { -0.002234052300, -0.001056977600, 0.002420334600, -0.001292564500, + -0.002099057700, 0.009738656200, -0.028628118700, 0.108230473600, + 0.276581678800, -0.086558899300, 0.056708946400, -0.043130672100, + 0.034711343700, -0.028946376900, 0.025445511400, -0.022589602700}, + { -0.002257986500, -0.001032008700, 0.002395334200, -0.001277542100, + -0.002119185700, 0.009764314400, -0.028692402200, 0.108569779600, + 0.276361694700, -0.086719885800, 0.056868813300, -0.043271529100, + 0.034844676000, -0.029064307100, 0.025552190100, -0.022685765900}, + { -0.002275798800, -0.001001252000, 0.002366059500, -0.001252260400, + -0.002137523800, 0.009790138000, -0.028757551100, 0.108916066600, + 0.276151572300, -0.086882727400, 0.057015156000, -0.043413931200, + 0.034977723200, -0.029181547900, 0.025659050200, -0.022782491400}, + { -0.002299519300, -0.000983084300, 0.002341848700, -0.001227918200, + -0.002164204600, 0.009821204200, -0.028826112600, 0.109260641300, + 0.275924294400, -0.087035380900, 0.057163011400, -0.043556341700, + 0.035112146900, -0.029306783100, 0.025762575100, -0.022884324300}, + { -0.002318342500, -0.000951184900, 0.002311491600, -0.001207356700, + -0.002190231800, 0.009858208200, -0.028883563000, 0.109614224000, + 0.275705340200, -0.087183966300, 0.057307956100, -0.043694747100, + 0.035236477300, -0.029428661900, 0.025877315200, -0.022977857000}, + { -0.002343785600, -0.000924936400, 0.002290080300, -0.001177431500, + -0.002211532800, 0.009886431100, -0.028949989000, 0.109957830700, + 0.275481985700, -0.087329639600, 0.057457528900, -0.043828523400, + 0.035363736500, -0.029541639900, 0.025980032600, -0.023071295000}, + { -0.002363396600, -0.000891896000, 0.002258762900, -0.001155997700, + -0.002237980900, 0.009918196800, -0.029019980500, 0.110312292600, + 0.275252475700, -0.087485388500, 0.057598214300, -0.043965976300, + 0.035494092700, -0.029663348400, 0.026080272800, -0.023164098500}, + { -0.002389152800, -0.000865190200, 0.002236651900, -0.001124397300, + -0.002266843400, 0.009941856300, -0.029083534200, 0.110654857700, + 0.275027780000, -0.087628679600, 0.057741079400, -0.044111852100, + 0.035620616000, -0.029776226200, 0.026184102500, -0.023265168600}, + { -0.002409751900, -0.000831096900, 0.002203852900, -0.001101449700, + -0.002295711000, 0.009982935500, -0.029157146400, 0.111004330000, + 0.274797647300, -0.087777694100, 0.057893310100, -0.044247467300, + 0.035750612400, -0.029893238400, 0.026296883500, -0.023356224600}, + { -0.002436583600, -0.000809275400, 0.002174932100, -0.001065376500, + -0.002322951600, 0.010016783300, -0.029228839400, 0.111361951300, + 0.274567466000, -0.087919930200, 0.058025057300, -0.044378805200, + 0.035875683000, -0.030010656200, 0.026393710400, -0.023446642300}, + { -0.002458072800, -0.000774072300, 0.002140548500, -0.001040420000, + -0.002353097400, 0.010052135800, -0.029301943900, 0.111721745400, + 0.274333594900, -0.088052895500, 0.058159209800, -0.044509951300, + 0.036000215000, -0.030121514300, 0.026496227300, -0.023547531800}, + { -0.002486798400, -0.000743082100, 0.002107901300, -0.001010435600, + -0.002377569500, 0.010090405600, -0.029366888100, 0.112076957700, + 0.274096422800, -0.088200445900, 0.058300810100, -0.044638462800, + 0.036123651800, -0.030232021100, 0.026596958900, -0.023639414500}, + { -0.002509178900, -0.000707489700, 0.002079209800, -0.000979178500, + -0.002412292800, 0.010130406600, -0.029450197300, 0.112436237300, + 0.273857995500, -0.088329074400, 0.058431854000, -0.044767731000, + 0.036247749400, -0.030349857900, 0.026701008800, -0.023725485500}, + { -0.002538546200, -0.000675641700, 0.002045354700, -0.000947146600, + -0.002444944300, 0.010165016900, -0.029513195500, 0.112791388300, + 0.273618006800, -0.088473828000, 0.058572790800, -0.044902786100, + 0.036367072300, -0.030457501700, 0.026799344000, -0.023815374100}, + { -0.002562081700, -0.000638597900, 0.002015094700, -0.000913965300, + -0.002481943700, 0.010207398600, -0.029598886400, 0.113154412600, + 0.273374591600, -0.088603102200, 0.058693934100, -0.045025119300, + 0.036484791400, -0.030563119800, 0.026898455900, -0.023913083300}, + { -0.002592531600, -0.000611735800, 0.001974049600, -0.000877067200, + -0.002513122700, 0.010252711500, -0.029676327000, 0.113513193200, + 0.273125908800, -0.088732072900, 0.058832779800, -0.045150554000, + 0.036606443900, -0.030679000400, 0.026993402700, -0.024001590900}, + { -0.002617432700, -0.000572476600, 0.001934940900, -0.000846562800, + -0.002549947700, 0.010301351000, -0.029750846500, 0.113886584200, + 0.272881898500, -0.088859686000, 0.058953274300, -0.045271977200, + 0.036723589400, -0.030784892400, 0.027097712600, -0.024087333600}, + { -0.002648556200, -0.000539287700, 0.001905685000, -0.000806920200, + -0.002588198100, 0.010335223100, -0.029830210100, 0.114246195000, + 0.272637989000, -0.088988209700, 0.059082508900, -0.045398291800, + 0.036835959600, -0.030886948300, 0.027193422300, -0.024182088200}, + { -0.002675426700, -0.000497529300, 0.001863568800, -0.000773306300, + -0.002628339500, 0.010387943600, -0.029915068400, 0.114620841300, + 0.272380972500, -0.089107334300, 0.059206370700, -0.045521223000, + 0.036954725600, -0.031000132500, 0.027286742200, -0.024270058800}, + { -0.002708812100, -0.000461001300, 0.001824188800, -0.000735439500, + -0.002661448300, 0.010435538400, -0.029994677500, 0.114983831800, + 0.272129953900, -0.089228688900, 0.059329528700, -0.045634938700, + 0.037065976700, -0.031100796100, 0.027379891900, -0.024355993600}, + { -0.002735490700, -0.000419849000, 0.001788462900, -0.000689215900, + -0.002707124200, 0.010482924400, -0.030075860600, 0.115356267200, + 0.271872904900, -0.089354351300, 0.059449185200, -0.045758252600, + 0.037192230500, -0.031210219100, 0.027478273400, -0.024437594700}, + { -0.002768719300, -0.000389873200, 0.001744376300, -0.000654868000, + -0.002747163000, 0.010528906800, -0.030164681700, 0.115727569700, + 0.271612337400, -0.089466219000, 0.059565965700, -0.045873231900, + 0.037295135500, -0.031304965400, 0.027568162200, -0.024528121700}, + { -0.002798079400, -0.000344868200, 0.001697477500, -0.000608943300, + -0.002794808000, 0.010579370800, -0.030249393200, 0.116105596800, + 0.271346326500, -0.089576437400, 0.059689822100, -0.045986571500, + 0.037406419500, -0.031405176100, 0.027660621000, -0.024614179100}, + { -0.002834574300, -0.000305400500, 0.001661742100, -0.000568717900, + -0.002840569000, 0.010637555700, -0.030338939200, 0.116487577100, + 0.271082832700, -0.089682984500, 0.059794346400, -0.046095149100, + 0.037513479000, -0.031508865800, 0.027746587900, -0.024696294000}, + { -0.002863090600, -0.000261546500, 0.001616196800, -0.000524060200, + -0.002886726400, 0.010686302700, -0.030421218200, 0.116863187300, + 0.270824597700, -0.089796024200, 0.059912731300, -0.046212603400, + 0.037619046000, -0.031606514800, 0.027845171900, -0.024784044100}, + { -0.002900989400, -0.000219487800, 0.001570599400, -0.000479303700, + -0.002927315300, 0.010742257600, -0.030515733500, 0.117240864200, + 0.270554278500, -0.089907403100, 0.060028342200, -0.046320155000, + 0.037725963100, -0.031710668200, 0.027931517600, -0.024865514300}, + { -0.002939110000, -0.000177806100, 0.001531519200, -0.000428383200, + -0.002978759900, 0.010796286200, -0.030603272900, 0.117623618000, + 0.270285855600, -0.090009416000, 0.060137232300, -0.046429802200, + 0.037824453100, -0.031801130600, 0.028016349500, -0.024945375600}, + { -0.002971404300, -0.000129367200, 0.001481455800, -0.000385645100, + -0.003028924400, 0.010859951400, -0.030704739700, 0.118009163200, + 0.270004747300, -0.090110535200, 0.060237393200, -0.046534338000, + 0.037928400600, -0.031903279600, 0.028108476700, -0.025021832500}, + { -0.003010799000, -0.000085506800, 0.001433304900, -0.000337127500, + -0.003080051000, 0.010914101700, -0.030792208700, 0.118393292300, + 0.269732724800, -0.090208244500, 0.060342765300, -0.046633690800, + 0.038028249000, -0.031994340000, 0.028194618000, -0.025109965600}, + { -0.003043352800, -0.000043286100, 0.001376710400, -0.000282666000, + -0.003129872000, 0.010978749000, -0.030894968100, 0.118781343500, + 0.269453183200, -0.090299385000, 0.060444527700, -0.046738202500, + 0.038128681100, -0.032076686500, 0.028274348700, -0.025185554200}, + { -0.003084097600, 0.000001652200, 0.001334856800, -0.000235221900, + -0.003190341200, 0.011040895200, -0.030996156800, 0.119168850700, + 0.269167050700, -0.090401757000, 0.060542535700, -0.046832398400, + 0.038225851900, -0.032173379200, 0.028361984300, -0.025257897700}, + { -0.003118681600, 0.000052969800, 0.001280388200, -0.000180668100, + -0.003240720400, 0.011105920600, -0.031092550300, 0.119563660200, + 0.268880111900, -0.090484913100, 0.060629970500, -0.046933803100, + 0.038315834400, -0.032256880200, 0.028442350400, -0.025341820300}, + { -0.003162561900, 0.000102202900, 0.001226211900, -0.000125395300, + -0.003299906800, 0.011175918200, -0.031191167100, 0.119959312000, + 0.268593285800, -0.090577351800, 0.060731151800, -0.047029530200, + 0.038413339800, -0.032353226800, 0.028521763200, -0.025418085200}, + { -0.003199268400, 0.000156014300, 0.001168676900, -0.000066919400, + -0.003361438800, 0.011241308700, -0.031296168100, 0.120353034900, + 0.268302409700, -0.090656855500, 0.060822958600, -0.047124517500, + 0.038499397300, -0.032433214400, 0.028598264500, -0.025491004800}, + { -0.003242762700, 0.000204180800, 0.001122626500, -0.000008679700, + -0.003414828400, 0.011309574900, -0.031402432600, 0.120754514200, + 0.268015321100, -0.090745034900, 0.060911299100, -0.047212175500, + 0.038596766500, -0.032520166800, 0.028679119400, -0.025558748500}, + { -0.003280931700, 0.000259812600, 0.001063521800, 0.000044884500, + -0.003483800400, 0.011387738000, -0.031508074100, 0.121152150800, + 0.267711413800, -0.090822485700, 0.061000604600, -0.047304659400, + 0.038681603800, -0.032600073600, 0.028755960800, -0.025638962100}, + { -0.003327797000, 0.000312623600, 0.001004973300, 0.000105206400, + -0.003548028100, 0.011456238300, -0.031616336600, 0.121558234900, + 0.267415888400, -0.090894622700, 0.061086213100, -0.047387160100, + 0.038767569400, -0.032686245400, 0.028827467700, -0.025709333200}, + { -0.003367277200, 0.000369742900, 0.000943784400, 0.000167921300, + -0.003614337200, 0.011526657700, -0.031726469200, 0.121966313400, + 0.267117532200, -0.090972059800, 0.061166435400, -0.047473566900, + 0.038846825400, -0.032761339500, 0.028907556300, -0.025775412400}, + { -0.003415490600, 0.000424433200, 0.000883119400, 0.000230091300, + -0.003674451700, 0.011609993400, -0.031842624600, 0.122364892800, + 0.266807297400, -0.091041565400, 0.061249320900, -0.047553631700, + 0.038930581400, -0.032847149600, 0.028979555600, -0.025852313500}, + { -0.003456266600, 0.000475867100, 0.000822340300, 0.000303040600, + -0.003749535300, 0.011688497100, -0.031960441800, 0.122782327100, + 0.266496871700, -0.091106317700, 0.061319593900, -0.047633284300, + 0.039010819600, -0.032912027100, 0.029044984700, -0.025916736900}, + { -0.003505638300, 0.000531731300, 0.000760093900, 0.000367747300, + -0.003818881300, 0.011769073200, -0.032068754900, 0.123192522800, + 0.266191486300, -0.091168632300, 0.061398326100, -0.047717396700, + 0.039088687600, -0.032993298700, 0.029119110500, -0.025979190500}, + { -0.003550040600, 0.000594828700, 0.000692495800, 0.000430866400, + -0.003898279700, 0.011852192100, -0.032198029300, 0.123611901100, + 0.265868569100, -0.091231578000, 0.061464903000, -0.047784155800, + 0.039161387400, -0.033061913700, 0.029187959100, -0.026053040700}, + { -0.003602065500, 0.000653931700, 0.000626385600, 0.000499981800, + -0.003972488800, 0.011937971200, -0.032311531400, 0.124029148500, + 0.265553269700, -0.091283444100, 0.061535070700, -0.047860778200, + 0.039232475500, -0.033136595000, 0.029249328200, -0.026115105800}, + { -0.003647327300, 0.000718027400, 0.000556866100, 0.000572266200, + -0.004049722900, 0.012020034000, -0.032439785200, 0.124448392000, + 0.265233959000, -0.091339033000, 0.061597170100, -0.047925510600, + 0.039309873100, -0.033199111900, 0.029320278800, -0.026173704500}, + { -0.003702105200, 0.000780101600, 0.000494443100, 0.000649052900, + -0.004123793300, 0.012117690200, -0.032570746200, 0.124873224300, + 0.264900163300, -0.091383468500, 0.061659588400, -0.047995635500, + 0.039375017600, -0.033269511800, 0.029378736300, -0.026232862100}, + { -0.003755895200, 0.000841447500, 0.000425856500, 0.000720904000, + -0.004201204700, 0.012206896600, -0.032687548300, 0.125295550200, + 0.264577145700, -0.091434512400, 0.061726016900, -0.048060026200, + 0.039436382500, -0.033329672000, 0.029438925700, -0.026300063600}, + { -0.003803350600, 0.000908270600, 0.000353021900, 0.000796996500, + -0.004282623500, 0.012293251100, -0.032819571200, 0.125720795200, + 0.264247652100, -0.091479732400, 0.061780340600, -0.048117964600, + 0.039508550400, -0.033394973500, 0.029502101900, -0.026353317400}, + { -0.003861457500, 0.000974841200, 0.000278697700, 0.000875358500, + -0.004367158600, 0.012390643600, -0.032952152900, 0.126156834400, + 0.263910245800, -0.091525806900, 0.061834912800, -0.048182001300, + 0.039568933000, -0.033460776600, 0.029556965600, -0.026409434000}, + { -0.003912507000, 0.001045891900, 0.000200514600, 0.000957623400, + -0.004455578400, 0.012491700600, -0.033087770300, 0.126588986000, + 0.263570751000, -0.091552786700, 0.061883959100, -0.048240646500, + 0.039624070900, -0.033515051500, 0.029612627700, -0.026473220500}, + { -0.003970051000, 0.001111797200, 0.000126628900, 0.001035742900, + -0.004539881200, 0.012588312500, -0.033218503400, 0.127016546500, + 0.263234790300, -0.091590421200, 0.061932964300, -0.048301375000, + 0.039688914300, -0.033575106100, 0.029670790800, -0.026522150600}, + { -0.004024302800, 0.001186615500, 0.000044341700, 0.001122432500, + -0.004633631600, 0.012695304100, -0.033360614900, 0.127463754300, + 0.262888924200, -0.091617477500, 0.061978479200, -0.048340055700, + 0.039737596900, -0.033623583100, 0.029720893300, -0.026573902300}, + { -0.004085082700, 0.001256538900, -0.000034034800, 0.001205478700, + -0.004723290400, 0.012797985100, -0.033498033100, 0.127907136500, + 0.262539269000, -0.091650848600, 0.062022592400, -0.048395229300, + 0.039790688300, -0.033684178100, 0.029779644500, -0.026631089400}, + { -0.004139947900, 0.001331802200, -0.000110045000, 0.001297966400, + -0.004821497300, 0.012908660300, -0.033643014900, 0.128351507600, + 0.262181947100, -0.091667171300, 0.062052525700, -0.048439828000, + 0.039840829600, -0.033723634100, 0.029823200100, -0.026677369100}, + { -0.004204208100, 0.001405618600, -0.000193120700, 0.001386785000, + -0.004924853700, 0.013014623800, -0.033792097000, 0.128796851700, + 0.261832669700, -0.091689502800, 0.062095944700, -0.048484357900, + 0.039885777900, -0.033776306400, 0.029866954600, -0.026723942900}, + { -0.004260417800, 0.001483063100, -0.000278806000, 0.001477736900, + -0.005023257700, 0.013125955200, -0.033937289100, 0.129242600400, + 0.261471099700, -0.091701578900, 0.062122650400, -0.048518010200, + 0.039936894900, -0.033824115200, 0.029914914700, -0.026764892400}, + { -0.004327008900, 0.001552956500, -0.000370001900, 0.001573784600, + -0.005126787000, 0.013243154600, -0.034089428900, 0.129704208100, + 0.261097309800, -0.091709577400, 0.062145158100, -0.048554302300, + 0.039972655000, -0.033861618500, 0.029956756400, -0.026817551300}, + { -0.004387061400, 0.001634796200, -0.000460660800, 0.001670082700, + -0.005230956800, 0.013361670100, -0.034249709200, 0.130162737200, + 0.260730415900, -0.091714875900, 0.062174189600, -0.048586130300, + 0.040006010900, -0.033904910800, 0.029992822400, -0.026858101400}, + { -0.004454372600, 0.001712493000, -0.000548375700, 0.001763978200, + -0.005332669200, 0.013476831400, -0.034399268600, 0.130622438100, + 0.260362390800, -0.091718251200, 0.062194230500, -0.048621392100, + 0.040048640600, -0.033945864700, 0.030035043200, -0.026894698800}, + { -0.004523786800, 0.001792774700, -0.000639267000, 0.001868519600, + -0.005434268500, 0.013594098000, -0.034558763500, 0.131081652600, + 0.259993523100, -0.091727265200, 0.062216113000, -0.048648464400, + 0.040076906100, -0.033977399000, 0.030072123900, -0.026942508300}, + { -0.004586585500, 0.001877974400, -0.000734060100, 0.001970438800, + -0.005551787400, 0.013714416500, -0.034721796900, 0.131545261000, + 0.259617106900, -0.091722403400, 0.062229403200, -0.048678076000, + 0.040115213700, -0.034014280800, 0.030111284500, -0.026976072400}, + { -0.004659557800, 0.001962511100, -0.000829862900, 0.002073337300, + -0.005663538600, 0.013840666600, -0.034882878100, 0.132019240400, + 0.259230513500, -0.091706871400, 0.062239873700, -0.048693489400, + 0.040133308800, -0.034036485500, 0.030138847000, -0.027009514400}, + { -0.004725695500, 0.002051981700, -0.000929347100, 0.002180035700, + -0.005779960100, 0.013979073500, -0.035051605900, 0.132491612100, + 0.258836019600, -0.091701376100, 0.062241192800, -0.048705371900, + 0.040165553900, -0.034068442600, 0.030166932700, -0.027050861400}, + { -0.004799901300, 0.002138024300, -0.001026848500, 0.002284840400, + -0.005893861400, 0.014108040800, -0.035222237300, 0.132964854500, + 0.258446283900, -0.091682539100, 0.062249560500, -0.048718972600, + 0.040182871900, -0.034098388500, 0.030199249900, -0.027078741500}, + { -0.004870595400, 0.002232582100, -0.001132590800, 0.002399200400, + -0.006025214200, 0.014243272400, -0.035401209000, 0.133455517400, + 0.258048229900, -0.091653939600, 0.062241715900, -0.048729769800, + 0.040203792100, -0.034111888100, 0.030220920600, -0.027107457100}, + { -0.004947156000, 0.002321555700, -0.001233847400, 0.002515774700, + -0.006140611400, 0.014382400400, -0.035570772900, 0.133930500000, + 0.257654713000, -0.091637493400, 0.062242822500, -0.048738434100, + 0.040216442600, -0.034137799500, 0.030242570200, -0.027143371600}, + { -0.005016493400, 0.002414767800, -0.001338252600, 0.002627935400, + -0.006262314300, 0.014519076200, -0.035748887100, 0.134413490000, + 0.257250575700, -0.091604648700, 0.062239840100, -0.048743282000, + 0.040233396200, -0.034157300200, 0.030266800800, -0.027164826500}, + { -0.005099278600, 0.002511020600, -0.001447380500, 0.002746842300, + -0.006399204100, 0.014660767100, -0.035934332400, 0.134912928100, + 0.256840267600, -0.091570002800, 0.062216233800, -0.048739644700, + 0.040234336300, -0.034163561400, 0.030281865200, -0.027187650100}, + { -0.005170558100, 0.002606541400, -0.001554671000, 0.002862380100, + -0.006525103100, 0.014809292700, -0.036112846200, 0.135398740700, + 0.256430891500, -0.091531009000, 0.062208163600, -0.048740138000, + 0.040247773900, -0.034180802400, 0.030304366300, -0.027215455700}, + { -0.005254922300, 0.002704732600, -0.001666038900, 0.002983502200, + -0.006664576800, 0.014954266000, -0.036308554600, 0.135898611300, + 0.256016406700, -0.091492283600, 0.062189717900, -0.048730903800, + 0.040245240000, -0.034192418000, 0.030314184400, -0.027233909600}, + { -0.005330421900, 0.002805447300, -0.001779622200, 0.003113745000, + -0.006794821100, 0.015108636900, -0.036493663300, 0.136392728600, + 0.255595532100, -0.091449297700, 0.062167723200, -0.048720314000, + 0.040247715100, -0.034191966500, 0.030331001200, -0.027248039400}, + { -0.005418047900, 0.002907510500, -0.001896019300, 0.003240948000, + -0.006941296500, 0.015268001700, -0.036685011500, 0.136902218400, + 0.255169264700, -0.091390047400, 0.062133585500, -0.048706450000, + 0.040240217600, -0.034199174200, 0.030336692900, -0.027272064200}, + { -0.005505373600, 0.003009395700, -0.002011695700, 0.003366480500, + -0.007078261400, 0.015421720100, -0.036888380700, 0.137411927200, + 0.254740700000, -0.091336957100, 0.062103176600, -0.048687487200, + 0.040235929700, -0.034199682900, 0.030337810300, -0.027283556600}, + { -0.005585382500, 0.003115541600, -0.002131832000, 0.003504827500, + -0.007224968800, 0.015582215100, -0.037080250800, 0.137915259700, + 0.254306086800, -0.091279630400, 0.062069083500, -0.048665013800, + 0.040221399100, -0.034201654500, 0.030346034000, -0.027291156400}, + { -0.005674994900, 0.003220173800, -0.002250865900, 0.003634318300, + -0.007366427500, 0.015748179900, -0.037284933600, 0.138428727000, + 0.253871405000, -0.091219627900, 0.062033003000, -0.048649242600, + 0.040210075200, -0.034189299100, 0.030346059500, -0.027308900300}, + { -0.005758700400, 0.003330548600, -0.002375530200, 0.003770118000, + -0.007521879300, 0.015909567200, -0.037496867100, 0.138949385400, + 0.253426570300, -0.091143115000, 0.061992180000, -0.048620522900, + 0.040197728000, -0.034182788700, 0.030348556000, -0.027312476200}, + { -0.005852674900, 0.003440460400, -0.002500886900, 0.003906772800, + -0.007671474500, 0.016084224600, -0.037702515700, 0.139476737000, + 0.252975093200, -0.091066456400, 0.061940861800, -0.048581744400, + 0.040168382200, -0.034171796400, 0.030338133100, -0.027314851900}, + { -0.005940171600, 0.003555441000, -0.002631162900, 0.004057015800, + -0.007831507500, 0.016259063300, -0.037917269600, 0.140002708200, + 0.252522799200, -0.090988640700, 0.061880613500, -0.048547165800, + 0.040149371300, -0.034151729900, 0.030339624000, -0.027324602000}, + { -0.006038006300, 0.003669989500, -0.002761828800, 0.004199685000, + -0.007987803500, 0.016441338600, -0.038138344000, 0.140535980000, + 0.252061541300, -0.090901636600, 0.061820252000, -0.048500250500, + 0.040112744100, -0.034134470700, 0.030324310800, -0.027322408700}, + { -0.006128817400, 0.003788978800, -0.002896910300, 0.004355118400, + -0.008153648900, 0.016622514100, -0.038359180300, 0.141069989900, + 0.251597538600, -0.090812420800, 0.061758888600, -0.048454321900, + 0.040084569400, -0.034113893000, 0.030306458400, -0.027317970100}, + { -0.006227186400, 0.003904318200, -0.003028595200, 0.004499540800, + -0.008319159800, 0.016794826900, -0.038581990000, 0.141605434000, + 0.251131429100, -0.090720594500, 0.061694722300, -0.048405249700, + 0.040054114300, -0.034091543000, 0.030295722900, -0.027310222200}, + { -0.006328397300, 0.004031158500, -0.003168595500, 0.004641928300, + -0.008476386900, 0.016978639900, -0.038804959500, 0.142142757000, + 0.250662990700, -0.090633704900, 0.061624954700, -0.048358207200, + 0.040007213300, -0.034057656100, 0.030276666600, -0.027311829700}, + { -0.006423738600, 0.004155528900, -0.003309959300, 0.004804548400, + -0.008650250700, 0.017168425100, -0.039034846100, 0.142695621300, + 0.250186846500, -0.090531292600, 0.061551769900, -0.048301646800, + 0.039969736100, -0.034029532000, 0.030260492100, -0.027299867000}, + { -0.006528665100, 0.004278722600, -0.003450970500, 0.004959568700, + -0.008827892900, 0.017361477800, -0.039267480800, 0.143244619700, + 0.249702348700, -0.090420060400, 0.061470769900, -0.048237360200, + 0.039925613200, -0.033995062800, 0.030230693900, -0.027285607600}, + { -0.006624732800, 0.004404173000, -0.003593946300, 0.005123920600, + -0.009003806900, 0.017553760300, -0.039499198000, 0.143792799000, + 0.249217705300, -0.090309454700, 0.061398382500, -0.048170611400, + 0.039872247300, -0.033963765200, 0.030211452200, -0.027278748100}, + { -0.006733019100, 0.004530986400, -0.003739016800, 0.005283093400, + -0.009178450500, 0.017763535800, -0.039744969800, 0.144353453000, + 0.248736735600, -0.090189496400, 0.061311021700, -0.048108842000, + 0.039820545400, -0.033924139600, 0.030177475500, -0.027261029400}, + { -0.006833632200, 0.004661933300, -0.003888052900, 0.005454760700, + -0.009362610000, 0.017964244700, -0.039985659800, 0.144912324100, + 0.248238004700, -0.090072272700, 0.061214521700, -0.048032095800, + 0.039764687300, -0.033871892800, 0.030150552400, -0.027238906300}, + { -0.006944808900, 0.004792587900, -0.004037760400, 0.005619673100, + -0.009551266900, 0.018169204500, -0.040230608100, 0.145484672100, + 0.247735896000, -0.089941342900, 0.061116139800, -0.047952151500, + 0.039706664600, -0.033825489900, 0.030111200300, -0.027225498800}, + { -0.007048709900, 0.004927883500, -0.004199654100, 0.005793881200, + -0.009738964200, 0.018374848700, -0.040483849800, 0.146046598800, + 0.247230365400, -0.089816950200, 0.061014854400, -0.047877574700, + 0.039635751800, -0.033778171100, 0.030078686000, -0.027198514200}, + { -0.007165536600, 0.005065305500, -0.004357110300, 0.005967529800, + -0.009937335800, 0.018590642800, -0.040747877600, 0.146628884600, + 0.246713790300, -0.089671417900, 0.060903432100, -0.047785728000, + 0.039566909200, -0.033722116600, 0.030030861700, -0.027169252500}, + { -0.007276776700, 0.005196186900, -0.004507476500, 0.006140611400, + -0.010115702100, 0.018796926000, -0.040992574000, 0.147194377100, + 0.246211745800, -0.089538068000, 0.060802547900, -0.047694806200, + 0.039498877900, -0.033667952600, 0.029984769900, -0.027149571000}, + { -0.007387874000, 0.005339424900, -0.004670815200, 0.006320666200, + -0.010320954700, 0.019027766500, -0.041261046500, 0.147782948500, + 0.245685443800, -0.089383414100, 0.060683139900, -0.047604443400, + 0.039421896500, -0.033597878400, 0.029942169700, -0.027114750800}, + { -0.007510337800, 0.005483782700, -0.004836420600, 0.006511584900, + -0.010526788500, 0.019252707100, -0.041534516600, 0.148377088500, + 0.245152841500, -0.089229128400, 0.060553978800, -0.047496635400, + 0.039330446700, -0.033531679300, 0.029885352000, -0.027077485500}, + { -0.007617681800, 0.005622426600, -0.004994838300, 0.006686063700, + -0.010725673400, 0.019468467400, -0.041789645200, 0.148963171000, + 0.244635866200, -0.089072585900, 0.060442764900, -0.047403936100, + 0.039252529900, -0.033469077500, 0.029839930100, -0.027050084400}, + { -0.007743458100, 0.005771106400, -0.005173633200, 0.006879940500, + -0.010936112200, 0.019706338200, -0.042065049900, 0.149561525700, + 0.244095869900, -0.088910561500, 0.060307776300, -0.047291253700, + 0.039164455700, -0.033396349900, 0.029777209800, -0.027007624400}, + { -0.007858967200, 0.005919845900, -0.005343167000, 0.007067136200, + -0.011149489200, 0.019938097200, -0.042344891600, 0.150163766200, + 0.243550330900, -0.088743758000, 0.060167864400, -0.047174313100, + 0.039073486100, -0.033321568100, 0.029721987600, -0.026962754000}, + { -0.007983157300, 0.006074179500, -0.005508592100, 0.007258796500, + -0.011356666000, 0.020172218500, -0.042616124600, 0.150758631000, + 0.243020071900, -0.088580561000, 0.060040729000, -0.047067967200, + 0.038982593900, -0.033247283000, 0.029658101500, -0.026927788900}, + { -0.008105130300, 0.006230560200, -0.005687757500, 0.007464989300, + -0.011579317500, 0.020414981800, -0.042907551700, 0.151374476700, + 0.242457316000, -0.088387814400, 0.059887026200, -0.046937384600, + 0.038870631000, -0.033163420100, 0.029594120700, -0.026875984700}, + { -0.008233810500, 0.006382559600, -0.005862421100, 0.007658087800, + -0.011799401200, 0.020662084900, -0.043192258600, 0.151985072700, + 0.241899935800, -0.088207332900, 0.059743644400, -0.046816052000, + 0.038766487900, -0.033069335600, 0.029522772700, -0.026834459500}, + { -0.008365745600, 0.006538532000, -0.006049746700, 0.007862043900, + -0.012020887000, 0.020904026300, -0.043490655700, 0.152598080900, + 0.241345587100, -0.088019183800, 0.059585807800, -0.046682659300, + 0.038659818300, -0.032980362900, 0.029455506100, -0.026779041200}, + { -0.008488941000, 0.006696553400, -0.006230594800, 0.008061850300, + -0.012248204100, 0.021158865900, -0.043782741400, 0.153217488200, + 0.240775110200, -0.087826286000, 0.059432200300, -0.046551952900, + 0.038547619600, -0.032887333800, 0.029375054800, -0.026721976200}, + { -0.008621850700, 0.006853393800, -0.006411265100, 0.008270409100, + -0.012474342700, 0.021413239200, -0.044074457000, 0.153836655200, + 0.240212813600, -0.087629196300, 0.059265982800, -0.046411553600, + 0.038435074500, -0.032793916400, 0.029303599100, -0.026672135800}, + { -0.008749942500, 0.007017378200, -0.006599218000, 0.008486986700, + -0.012716522600, 0.021665336500, -0.044383621100, 0.154463623000, + 0.239631193600, -0.087424514000, 0.059102015300, -0.046272077200, + 0.038314783100, -0.032692580400, 0.029216641900, -0.026609057300}, + { -0.008886221300, 0.007178451900, -0.006792714500, 0.008689620600, + -0.012948009400, 0.021925282500, -0.044681119300, 0.155098325200, + 0.239060508500, -0.087219020100, 0.058937803200, -0.046131179100, + 0.038193255500, -0.032591551900, 0.029137687900, -0.026544650500}, + { -0.009017719800, 0.007346605300, -0.006985204400, 0.008911224900, + -0.013188518400, 0.022195302800, -0.044997006400, 0.155734052300, + 0.238467918900, -0.087010194700, 0.058751361600, -0.045972814900, + 0.038064564500, -0.032483676700, 0.029046121000, -0.026486139900}, + { -0.009157733600, 0.007511909600, -0.007175647500, 0.009130702800, + -0.013426053000, 0.022454091700, -0.045304400400, 0.156379869500, + 0.237883048800, -0.086789838900, 0.058573065700, -0.045819242800, + 0.037922966700, -0.032373937700, 0.028959823600, -0.026415860100}, + { -0.009301621700, 0.007690545500, -0.007377781200, 0.009352140300, + -0.013667509600, 0.022725762800, -0.045622167700, 0.157018351900, + 0.237292774000, -0.086564748300, 0.058390364400, -0.045653170900, + 0.037787266600, -0.032260085200, 0.028870030600, -0.026341359100}, + { -0.009438250700, 0.007864516400, -0.007577424400, 0.009574249500, + -0.013927827700, 0.023004845600, -0.045939676500, 0.157677386600, + 0.236683031800, -0.086329249600, 0.058190028400, -0.045489825900, + 0.037645625200, -0.032140124600, 0.028767035600, -0.026274692700}, + { -0.009580872000, 0.008033410300, -0.007771917200, 0.009798419700, + -0.014171124700, 0.023277932300, -0.046258378200, 0.158318052700, + 0.236087895800, -0.086106607600, 0.058000802500, -0.045327782600, + 0.037504763700, -0.032022055600, 0.028674126500, -0.026197380900}, + { -0.009721894700, 0.008212777900, -0.007978032900, 0.010035312500, + -0.014428158400, 0.023565684300, -0.046592644400, 0.158984686900, + 0.235466308600, -0.085859387000, 0.057798739800, -0.045153519300, + 0.037352645400, -0.031893498200, 0.028563276700, -0.026114821800}, + { -0.009871323400, 0.008389875100, -0.008190788700, 0.010268343100, + -0.014682289800, 0.023850713300, -0.046924112000, 0.159640235100, + 0.234853461000, -0.085609536100, 0.057593857200, -0.044967652300, + 0.037198907100, -0.031755365000, 0.028462385300, -0.026039689900}, + { -0.010014352200, 0.008571352800, -0.008398761300, 0.010499975700, + -0.014953382600, 0.024141775800, -0.047261891500, 0.160311566300, + 0.234232816700, -0.085360140300, 0.057372021900, -0.044785182900, + 0.037040075600, -0.031620187000, 0.028346554700, -0.025952967800}, + { -0.010166260400, 0.008751093900, -0.008606212500, 0.010738923200, + -0.015213032100, 0.024432461700, -0.047598975500, 0.160982608100, + 0.233611368900, -0.085101130000, 0.057159516400, -0.044601390800, + 0.036879378400, -0.031484316000, 0.028237949300, -0.025863505900}, + { -0.010321897600, 0.008936015200, -0.008827674600, 0.010981773200, + -0.015478042100, 0.024729320900, -0.047942312100, 0.161661569100, + 0.232972740300, -0.084843025700, 0.056938084300, -0.044400455400, + 0.036712214700, -0.031342439100, 0.028116156900, -0.025780065800}, + { -0.010466289100, 0.009127889900, -0.009036452200, 0.011223231400, + -0.015749030400, 0.025020862500, -0.048280259200, 0.162326124500, + 0.232344053400, -0.084586151300, 0.056717963000, -0.044210265700, + 0.036546321900, -0.031201495400, 0.028003222400, -0.025686033500}, + { -0.010624282400, 0.009314904500, -0.009252420500, 0.011471920900, + -0.016019505400, 0.025323118600, -0.048628982800, 0.163011748400, + 0.231703178600, -0.084306442100, 0.056487091900, -0.044009688500, + 0.036370261500, -0.031051508400, 0.027873625100, -0.025587973900}, + { -0.010770500300, 0.009500718600, -0.009474642200, 0.011715943000, + -0.016285463700, 0.025620788400, -0.048972602300, 0.163692118500, + 0.231066748100, -0.084039975100, 0.056259353500, -0.043812102400, + 0.036197201000, -0.030904859500, 0.027756477200, -0.025499892900}, + { -0.010931246800, 0.009691737700, -0.009694581800, 0.011960978000, + -0.016571687400, 0.025928666900, -0.049336013900, 0.164383311000, + 0.230408652300, -0.083761749100, 0.056019673100, -0.043603445300, + 0.036015066800, -0.030749714100, 0.027631903700, -0.025396035700}, + { -0.011083143100, 0.009884244000, -0.009924855700, 0.012214065900, + -0.016847826800, 0.026245793300, -0.049688721100, 0.165075323300, + 0.229757192500, -0.083479360500, 0.055777843800, -0.043392967400, + 0.035830313000, -0.030592744600, 0.027496482800, -0.025301408600}, + { -0.011247937000, 0.010079726100, -0.010150237400, 0.012473832200, + -0.017139617800, 0.026560090900, -0.050058620700, 0.165774369800, + 0.229096722700, -0.083187562800, 0.055525970000, -0.043164809800, + 0.035639110100, -0.030429438500, 0.027364856000, -0.025191900900}, + { -0.011414156900, 0.010285088600, -0.010375664800, 0.012733949200, + -0.017423150000, 0.026876050100, -0.050421461700, 0.166486654000, + 0.228431981400, -0.082882457600, 0.055272032600, -0.042943711600, + 0.035452683100, -0.030260595300, 0.027218999400, -0.025080573500}, + { -0.011569876800, 0.010482366800, -0.010611428200, 0.012993995900, + -0.017715404200, 0.027190997300, -0.050791534100, 0.167187177600, + 0.227767190600, -0.082585986900, 0.055017316200, -0.042721547900, + 0.035257818300, -0.030094081500, 0.027084885600, -0.024977875100}, + { -0.011735641200, 0.010679052100, -0.010838874100, 0.013255461800, + -0.018000408400, 0.027517442600, -0.051153628100, 0.167891670700, + 0.227096876100, -0.082283304900, 0.054765605000, -0.042492896400, + 0.035056918500, -0.029922928500, 0.026936217500, -0.024863147800}, + { -0.011895524600, 0.010881222400, -0.011080393900, 0.013522101400, + -0.018299976100, 0.027840232000, -0.051531646700, 0.168601698400, + 0.226418695900, -0.081973147300, 0.054498561600, -0.042259399400, + 0.034851636500, -0.029746913300, 0.026793421400, -0.024744283000}, + { -0.012061741600, 0.011078958400, -0.011308563500, 0.013785401700, + -0.018595879900, 0.028167737000, -0.051903321700, 0.169306244900, + 0.225746079600, -0.081675879800, 0.054233890700, -0.042028641800, + 0.034649470800, -0.029583320700, 0.026652632600, -0.024635779800}, + { -0.012233380000, 0.011282444600, -0.011543470000, 0.014055945900, + -0.018890880000, 0.028496294000, -0.052278354600, 0.170034645100, + 0.225057382600, -0.081353165500, 0.053955054200, -0.041784523800, + 0.034433929800, -0.029398158700, 0.026492726700, -0.024512416500}, + { -0.012397906100, 0.011498850800, -0.011789873100, 0.014328734600, + -0.019197522300, 0.028826801200, -0.052664294200, 0.170754821100, + 0.224364465600, -0.081027646800, 0.053673705700, -0.041537691400, + 0.034216344400, -0.029211010100, 0.026340154100, -0.024385306400}, + { -0.012569967700, 0.011703276500, -0.012026289200, 0.014600559700, + -0.019494308900, 0.029165749700, -0.053047725600, 0.171482978400, + 0.223673445000, -0.080701758400, 0.053401535600, -0.041291105000, + 0.034007728400, -0.029023858700, 0.026178649300, -0.024268830700}, + { -0.012733999200, 0.011910640700, -0.012273784000, 0.014874303900, + -0.019802025600, 0.029497290200, -0.053434105100, 0.172204980000, + 0.222975700000, -0.080378906900, 0.053114037000, -0.041039151900, + 0.033785715200, -0.028832942600, 0.026023212900, -0.024139101200}, + { -0.012909827000, 0.012119093100, -0.012514786800, 0.015152338700, + -0.020114351000, 0.029842400300, -0.053824260500, 0.172940887300, + 0.222282695500, -0.080040051200, 0.052820808400, -0.040782083200, + 0.033559250100, -0.028638659800, 0.025864605000, -0.024016696400}, + { -0.013077694700, 0.012330804800, -0.012768208900, 0.015431882400, + -0.020419854900, 0.030191368700, -0.054217452300, 0.173672040800, + 0.221572261900, -0.079694814600, 0.052531679400, -0.040518864300, + 0.033326721800, -0.028439265300, 0.025691724900, -0.023881218600}, + { -0.013256584600, 0.012552301800, -0.013011940600, 0.015713408100, + -0.020736060900, 0.030531916300, -0.054613554100, 0.174415121200, + 0.220859647000, -0.079355588500, 0.052228216400, -0.040252363400, + 0.033091335800, -0.028236227500, 0.025525720700, -0.023743015900}, + { -0.013434190200, 0.012763748700, -0.013265795800, 0.015994241000, + -0.021052047100, 0.030880792200, -0.055006606000, 0.175147783700, + 0.220145965700, -0.079005398700, 0.051934897400, -0.039986031200, + 0.032865074900, -0.028032712000, 0.025349395400, -0.023613527100}, + { -0.013607326700, 0.012981847200, -0.013517060900, 0.016283677300, + -0.021377016200, 0.031239628300, -0.055419411200, 0.175899125500, + 0.219420925700, -0.078653073000, 0.051620280600, -0.039709589400, + 0.032621025300, -0.027822778600, 0.025177627600, -0.023470126900}, + { -0.013782589000, 0.013190197300, -0.013766570600, 0.016559610500, + -0.021678237600, 0.031575100900, -0.055808854800, 0.176637181300, + 0.218717601900, -0.078311440600, 0.051325946300, -0.039451087500, + 0.032383743600, -0.027618613700, 0.025010333400, -0.023331058700}, + { -0.013957302500, 0.013409708800, -0.014020295800, 0.016852102000, + -0.022006668200, 0.031937517200, -0.056216071400, 0.177395411400, + 0.217982956500, -0.077938975900, 0.051002551300, -0.039166330500, + 0.032131961500, -0.027401344200, 0.024822499400, -0.023192767000}, + { -0.014137826400, 0.013634160600, -0.014277189300, 0.017136805900, + -0.022327273100, 0.032291873400, -0.056623155700, 0.178133731400, + 0.217257004100, -0.077584995300, 0.050696728900, -0.038888338700, + 0.031895218300, -0.027197775400, 0.024646690200, -0.023045282800}, + { -0.014322369000, 0.013853217500, -0.014530205800, 0.017428585500, + -0.022655062900, 0.032653627200, -0.057029286500, 0.178891911700, + 0.216528434500, -0.077216471400, 0.050368084700, -0.038598885400, + 0.031639043700, -0.026976449100, 0.024455251600, -0.022894485700}, + { -0.014497130900, 0.014073600400, -0.014793724500, 0.017720424400, + -0.022983229400, 0.033016325000, -0.057445269000, 0.179650074100, + 0.215789443000, -0.076847958800, 0.050048943000, -0.038308589100, + 0.031382426000, -0.026755494400, 0.024273618900, -0.022751911400}, + { -0.014682624600, 0.014293733300, -0.015048990300, 0.018023602800, + -0.023312864800, 0.033380176500, -0.057853443800, 0.180411228100, + 0.215054814200, -0.076463911600, 0.049724541400, -0.038014094400, + 0.031121419200, -0.026530109300, 0.024086993000, -0.022596809400}, + { -0.014859695200, 0.014525847700, -0.015314517000, 0.018318827500, + -0.023644693500, 0.033746221600, -0.058272616300, 0.181174383300, + 0.214307631900, -0.076086492000, 0.049388587800, -0.037727262100, + 0.030867454500, -0.026301452600, 0.023889634600, -0.022449489700}, + { -0.015043628800, 0.014744644300, -0.015567389200, 0.018609777000, + -0.023962891000, 0.034108731100, -0.058687999000, 0.181933910400, + 0.213571477500, -0.075708968100, 0.049061183200, -0.037429622300, + 0.030603552000, -0.026074188000, 0.023702400300, -0.022293365600}, + { -0.015230621700, 0.014967722500, -0.015834525700, 0.018906008200, + -0.024296112800, 0.034477126200, -0.059108931600, 0.182699826000, + 0.212818269000, -0.075324331700, 0.048728034800, -0.037126070200, + 0.030334649300, -0.025851070600, 0.023509696000, -0.022132339300}, + { -0.015403239200, 0.015184755300, -0.016085532200, 0.019196261600, + -0.024622369200, 0.034836693200, -0.059511496600, 0.183458281900, + 0.212080113800, -0.074943583300, 0.048389608700, -0.036837633800, + 0.030079543400, -0.025621473000, 0.023311149400, -0.021984907000}, + { -0.015590979000, 0.015408947400, -0.016353520500, 0.019493453600, + -0.024956657000, 0.035205975300, -0.059933451000, 0.184226621300, + 0.211321394300, -0.074561582900, 0.048049643100, -0.036528248900, + 0.029805219600, -0.025384306000, 0.023115206300, -0.021820726800}, + { -0.015774153200, 0.015647568900, -0.016617815600, 0.019807043400, + -0.025298080600, 0.035582464000, -0.060363020600, 0.185012756300, + 0.210561671700, -0.074159545200, 0.047700175200, -0.036210874300, + 0.029523208500, -0.025139945600, 0.022903521200, -0.021652637100}, + { -0.015957718800, 0.015866363900, -0.016880438900, 0.020098938000, + -0.025626490200, 0.035944852300, -0.060777269100, 0.185774583200, + 0.209816210000, -0.073770259000, 0.047363636300, -0.035914525800, + 0.029260937100, -0.024904284200, 0.022708784900, -0.021499355400}, + { -0.016146341100, 0.016091423300, -0.017150197200, 0.020398479100, + -0.025963577000, 0.036317429100, -0.061201713700, 0.186547463400, + 0.209047984700, -0.073368078300, 0.047014992300, -0.035596734200, + 0.028978835200, -0.024669258800, 0.022505698100, -0.021329645000}, + { -0.016320272300, 0.016310576200, -0.017403342300, 0.020690491600, + -0.026292446400, 0.036679979100, -0.061615606400, 0.187310565800, + 0.208297289900, -0.072981740600, 0.046672091800, -0.035285782400, + 0.028712202900, -0.024430026800, 0.022298988100, -0.021174596000}, + { -0.016517713700, 0.016555097200, -0.017684359000, 0.021012495700, + -0.026652137000, 0.037074921500, -0.062062849200, 0.188107078200, + 0.207521739000, -0.072562306900, 0.046307362400, -0.034963432800, + 0.028417277400, -0.024173926100, 0.022085993400, -0.020996166200}, + { -0.016693369200, 0.016775967300, -0.017940069300, 0.021307570300, + -0.026984503800, 0.037441286100, -0.062480664600, 0.188874906800, + 0.206762441600, -0.072157646700, 0.045956576300, -0.034644987500, + 0.028143706400, -0.023928413000, 0.021882650000, -0.020826571400}, + { -0.016880502600, 0.016999617900, -0.018208006800, 0.021605481800, + -0.027319373500, 0.037811009900, -0.062901579400, 0.189647520400, + 0.205988741400, -0.071756528600, 0.045600627200, -0.034330459400, + 0.027855743300, -0.023678399200, 0.021665948800, -0.020662712500}, + { -0.017068318000, 0.017223435000, -0.018475976000, 0.021903230800, + -0.027654693700, 0.038181666300, -0.063323023000, 0.190429725400, + 0.205222534800, -0.071353119600, 0.045242727900, -0.034004343600, + 0.027565877300, -0.023436396900, 0.021457189000, -0.020488542500}, + { -0.017246917900, 0.017457669100, -0.018736272400, 0.022213168000, + -0.027992598200, 0.038563100600, -0.063755422100, 0.191204356500, + 0.204452658100, -0.070936826400, 0.044881428000, -0.033686094400, + 0.027283488000, -0.023183090400, 0.021246949000, -0.020321895500}, + { -0.017437502400, 0.017684574800, -0.019007749200, 0.022514818900, + -0.028332071700, 0.038937772100, -0.064180906300, 0.191992723900, + 0.203676829800, -0.070522535200, 0.044513642200, -0.033351129700, + 0.026985379900, -0.022924089100, 0.021022365800, -0.020142555600}, + { -0.017612607500, 0.017905551400, -0.019263469700, 0.022810027000, + -0.028664228600, 0.039313071700, -0.064606182200, 0.192761249200, + 0.202909086600, -0.070116301600, 0.044153394600, -0.033033628600, + 0.026704131700, -0.022671934000, 0.020813349900, -0.019967342300}, + { -0.017804156200, 0.018142992400, -0.019536907100, 0.023124244000, + -0.029016668700, 0.039691471800, -0.065035304900, 0.193554029900, + 0.202126709100, -0.069685647800, 0.043779368600, -0.032692955500, + 0.026400789300, -0.022417171100, 0.020592971000, -0.019792949400}, + { -0.017992718700, 0.018368200800, -0.019806941800, 0.023424641100, + -0.029354657000, 0.040073376700, -0.065467078100, 0.194340183400, + 0.201348610200, -0.069267933600, 0.043409003500, -0.032365611400, + 0.026110304900, -0.022156493800, 0.020376369200, -0.019611597000}, + { -0.018169389900, 0.018590771100, -0.020064391100, 0.023730685800, + -0.029688570400, 0.040441389700, -0.065894130000, 0.195112977600, + 0.200571593400, -0.068850504200, 0.043048431400, -0.032029425400, + 0.025810897300, -0.021896568900, 0.020150932200, -0.019430720600}, + { -0.018352128100, 0.018808952700, -0.020326930300, 0.024023078900, + -0.030018101100, 0.040814643600, -0.066306546900, 0.195890653300, + 0.199799462500, -0.068437128700, 0.042682739700, -0.031706143000, + 0.025524084500, -0.021648572900, 0.019936501000, -0.019261187000}, + { -0.018529692600, 0.019042047700, -0.020594876900, 0.024321965700, + -0.030363853100, 0.041184957000, -0.066735945800, 0.196675804800, + 0.199017015900, -0.068014184700, 0.042307739300, -0.031374606800, + 0.025230096300, -0.021383827800, 0.019716451300, -0.019076980100}, + { -0.018717857800, 0.019266724700, -0.020855153600, 0.024631079300, + -0.030701275000, 0.041566227500, -0.067166371900, 0.197464077100, + 0.198230805200, -0.067585453700, 0.041936966700, -0.031028914900, + 0.024921902400, -0.021116319300, 0.019483967100, -0.018899947900}, + { -0.018899947900, 0.019483967100, -0.021116319300, 0.024921902400, + -0.031028914900, 0.041936966700, -0.067585453700, 0.198230805200, + 0.197464077100, -0.067166371900, 0.041566227500, -0.030701275000, + 0.024631079300, -0.020855153600, 0.019266724700, -0.018717857800}, + { -0.019076980100, 0.019716451300, -0.021383827800, 0.025230096300, + -0.031374606800, 0.042307739300, -0.068014184700, 0.199017015900, + 0.196675804800, -0.066735945800, 0.041184957000, -0.030363853100, + 0.024321965700, -0.020594876900, 0.019042047700, -0.018529692600}, + { -0.019261187000, 0.019936501000, -0.021648572900, 0.025524084500, + -0.031706143000, 0.042682739700, -0.068437128700, 0.199799462500, + 0.195890653300, -0.066306546900, 0.040814643600, -0.030018101100, + 0.024023078900, -0.020326930300, 0.018808952700, -0.018352128100}, + { -0.019430720600, 0.020150932200, -0.021896568900, 0.025810897300, + -0.032029425400, 0.043048431400, -0.068850504200, 0.200571593400, + 0.195112977600, -0.065894130000, 0.040441389700, -0.029688570400, + 0.023730685800, -0.020064391100, 0.018590771100, -0.018169389900}, + { -0.019611597000, 0.020376369200, -0.022156493800, 0.026110304900, + -0.032365611400, 0.043409003500, -0.069267933600, 0.201348610200, + 0.194340183400, -0.065467078100, 0.040073376700, -0.029354657000, + 0.023424641100, -0.019806941800, 0.018368200800, -0.017992718700}, + { -0.019792949400, 0.020592971000, -0.022417171100, 0.026400789300, + -0.032692955500, 0.043779368600, -0.069685647800, 0.202126709100, + 0.193554029900, -0.065035304900, 0.039691471800, -0.029016668700, + 0.023124244000, -0.019536907100, 0.018142992400, -0.017804156200}, + { -0.019967342300, 0.020813349900, -0.022671934000, 0.026704131700, + -0.033033628600, 0.044153394600, -0.070116301600, 0.202909086600, + 0.192761249200, -0.064606182200, 0.039313071700, -0.028664228600, + 0.022810027000, -0.019263469700, 0.017905551400, -0.017612607500}, + { -0.020142555600, 0.021022365800, -0.022924089100, 0.026985379900, + -0.033351129700, 0.044513642200, -0.070522535200, 0.203676829800, + 0.191992723900, -0.064180906300, 0.038937772100, -0.028332071700, + 0.022514818900, -0.019007749200, 0.017684574800, -0.017437502400}, + { -0.020321895500, 0.021246949000, -0.023183090400, 0.027283488000, + -0.033686094400, 0.044881428000, -0.070936826400, 0.204452658100, + 0.191204356500, -0.063755422100, 0.038563100600, -0.027992598200, + 0.022213168000, -0.018736272400, 0.017457669100, -0.017246917900}, + { -0.020488542500, 0.021457189000, -0.023436396900, 0.027565877300, + -0.034004343600, 0.045242727900, -0.071353119600, 0.205222534800, + 0.190429725400, -0.063323023000, 0.038181666300, -0.027654693700, + 0.021903230800, -0.018475976000, 0.017223435000, -0.017068318000}, + { -0.020662712500, 0.021665948800, -0.023678399200, 0.027855743300, + -0.034330459400, 0.045600627200, -0.071756528600, 0.205988741400, + 0.189647520400, -0.062901579400, 0.037811009900, -0.027319373500, + 0.021605481800, -0.018208006800, 0.016999617900, -0.016880502600}, + { -0.020826571400, 0.021882650000, -0.023928413000, 0.028143706400, + -0.034644987500, 0.045956576300, -0.072157646700, 0.206762441600, + 0.188874906800, -0.062480664600, 0.037441286100, -0.026984503800, + 0.021307570300, -0.017940069300, 0.016775967300, -0.016693369200}, + { -0.020996166200, 0.022085993400, -0.024173926100, 0.028417277400, + -0.034963432800, 0.046307362400, -0.072562306900, 0.207521739000, + 0.188107078200, -0.062062849200, 0.037074921500, -0.026652137000, + 0.021012495700, -0.017684359000, 0.016555097200, -0.016517713700}, + { -0.021174596000, 0.022298988100, -0.024430026800, 0.028712202900, + -0.035285782400, 0.046672091800, -0.072981740600, 0.208297289900, + 0.187310565800, -0.061615606400, 0.036679979100, -0.026292446400, + 0.020690491600, -0.017403342300, 0.016310576200, -0.016320272300}, + { -0.021329645000, 0.022505698100, -0.024669258800, 0.028978835200, + -0.035596734200, 0.047014992300, -0.073368078300, 0.209047984700, + 0.186547463400, -0.061201713700, 0.036317429100, -0.025963577000, + 0.020398479100, -0.017150197200, 0.016091423300, -0.016146341100}, + { -0.021499355400, 0.022708784900, -0.024904284200, 0.029260937100, + -0.035914525800, 0.047363636300, -0.073770259000, 0.209816210000, + 0.185774583200, -0.060777269100, 0.035944852300, -0.025626490200, + 0.020098938000, -0.016880438900, 0.015866363900, -0.015957718800}, + { -0.021652637100, 0.022903521200, -0.025139945600, 0.029523208500, + -0.036210874300, 0.047700175200, -0.074159545200, 0.210561671700, + 0.185012756300, -0.060363020600, 0.035582464000, -0.025298080600, + 0.019807043400, -0.016617815600, 0.015647568900, -0.015774153200}, + { -0.021820726800, 0.023115206300, -0.025384306000, 0.029805219600, + -0.036528248900, 0.048049643100, -0.074561582900, 0.211321394300, + 0.184226621300, -0.059933451000, 0.035205975300, -0.024956657000, + 0.019493453600, -0.016353520500, 0.015408947400, -0.015590979000}, + { -0.021984907000, 0.023311149400, -0.025621473000, 0.030079543400, + -0.036837633800, 0.048389608700, -0.074943583300, 0.212080113800, + 0.183458281900, -0.059511496600, 0.034836693200, -0.024622369200, + 0.019196261600, -0.016085532200, 0.015184755300, -0.015403239200}, + { -0.022132339300, 0.023509696000, -0.025851070600, 0.030334649300, + -0.037126070200, 0.048728034800, -0.075324331700, 0.212818269000, + 0.182699826000, -0.059108931600, 0.034477126200, -0.024296112800, + 0.018906008200, -0.015834525700, 0.014967722500, -0.015230621700}, + { -0.022293365600, 0.023702400300, -0.026074188000, 0.030603552000, + -0.037429622300, 0.049061183200, -0.075708968100, 0.213571477500, + 0.181933910400, -0.058687999000, 0.034108731100, -0.023962891000, + 0.018609777000, -0.015567389200, 0.014744644300, -0.015043628800}, + { -0.022449489700, 0.023889634600, -0.026301452600, 0.030867454500, + -0.037727262100, 0.049388587800, -0.076086492000, 0.214307631900, + 0.181174383300, -0.058272616300, 0.033746221600, -0.023644693500, + 0.018318827500, -0.015314517000, 0.014525847700, -0.014859695200}, + { -0.022596809400, 0.024086993000, -0.026530109300, 0.031121419200, + -0.038014094400, 0.049724541400, -0.076463911600, 0.215054814200, + 0.180411228100, -0.057853443800, 0.033380176500, -0.023312864800, + 0.018023602800, -0.015048990300, 0.014293733300, -0.014682624600}, + { -0.022751911400, 0.024273618900, -0.026755494400, 0.031382426000, + -0.038308589100, 0.050048943000, -0.076847958800, 0.215789443000, + 0.179650074100, -0.057445269000, 0.033016325000, -0.022983229400, + 0.017720424400, -0.014793724500, 0.014073600400, -0.014497130900}, + { -0.022894485700, 0.024455251600, -0.026976449100, 0.031639043700, + -0.038598885400, 0.050368084700, -0.077216471400, 0.216528434500, + 0.178891911700, -0.057029286500, 0.032653627200, -0.022655062900, + 0.017428585500, -0.014530205800, 0.013853217500, -0.014322369000}, + { -0.023045282800, 0.024646690200, -0.027197775400, 0.031895218300, + -0.038888338700, 0.050696728900, -0.077584995300, 0.217257004100, + 0.178133731400, -0.056623155700, 0.032291873400, -0.022327273100, + 0.017136805900, -0.014277189300, 0.013634160600, -0.014137826400}, + { -0.023192767000, 0.024822499400, -0.027401344200, 0.032131961500, + -0.039166330500, 0.051002551300, -0.077938975900, 0.217982956500, + 0.177395411400, -0.056216071400, 0.031937517200, -0.022006668200, + 0.016852102000, -0.014020295800, 0.013409708800, -0.013957302500}, + { -0.023331058700, 0.025010333400, -0.027618613700, 0.032383743600, + -0.039451087500, 0.051325946300, -0.078311440600, 0.218717601900, + 0.176637181300, -0.055808854800, 0.031575100900, -0.021678237600, + 0.016559610500, -0.013766570600, 0.013190197300, -0.013782589000}, + { -0.023470126900, 0.025177627600, -0.027822778600, 0.032621025300, + -0.039709589400, 0.051620280600, -0.078653073000, 0.219420925700, + 0.175899125500, -0.055419411200, 0.031239628300, -0.021377016200, + 0.016283677300, -0.013517060900, 0.012981847200, -0.013607326700}, + { -0.023613527100, 0.025349395400, -0.028032712000, 0.032865074900, + -0.039986031200, 0.051934897400, -0.079005398700, 0.220145965700, + 0.175147783700, -0.055006606000, 0.030880792200, -0.021052047100, + 0.015994241000, -0.013265795800, 0.012763748700, -0.013434190200}, + { -0.023743015900, 0.025525720700, -0.028236227500, 0.033091335800, + -0.040252363400, 0.052228216400, -0.079355588500, 0.220859647000, + 0.174415121200, -0.054613554100, 0.030531916300, -0.020736060900, + 0.015713408100, -0.013011940600, 0.012552301800, -0.013256584600}, + { -0.023881218600, 0.025691724900, -0.028439265300, 0.033326721800, + -0.040518864300, 0.052531679400, -0.079694814600, 0.221572261900, + 0.173672040800, -0.054217452300, 0.030191368700, -0.020419854900, + 0.015431882400, -0.012768208900, 0.012330804800, -0.013077694700}, + { -0.024016696400, 0.025864605000, -0.028638659800, 0.033559250100, + -0.040782083200, 0.052820808400, -0.080040051200, 0.222282695500, + 0.172940887300, -0.053824260500, 0.029842400300, -0.020114351000, + 0.015152338700, -0.012514786800, 0.012119093100, -0.012909827000}, + { -0.024139101200, 0.026023212900, -0.028832942600, 0.033785715200, + -0.041039151900, 0.053114037000, -0.080378906900, 0.222975700000, + 0.172204980000, -0.053434105100, 0.029497290200, -0.019802025600, + 0.014874303900, -0.012273784000, 0.011910640700, -0.012733999200}, + { -0.024268830700, 0.026178649300, -0.029023858700, 0.034007728400, + -0.041291105000, 0.053401535600, -0.080701758400, 0.223673445000, + 0.171482978400, -0.053047725600, 0.029165749700, -0.019494308900, + 0.014600559700, -0.012026289200, 0.011703276500, -0.012569967700}, + { -0.024385306400, 0.026340154100, -0.029211010100, 0.034216344400, + -0.041537691400, 0.053673705700, -0.081027646800, 0.224364465600, + 0.170754821100, -0.052664294200, 0.028826801200, -0.019197522300, + 0.014328734600, -0.011789873100, 0.011498850800, -0.012397906100}, + { -0.024512416500, 0.026492726700, -0.029398158700, 0.034433929800, + -0.041784523800, 0.053955054200, -0.081353165500, 0.225057382600, + 0.170034645100, -0.052278354600, 0.028496294000, -0.018890880000, + 0.014055945900, -0.011543470000, 0.011282444600, -0.012233380000}, + { -0.024635779800, 0.026652632600, -0.029583320700, 0.034649470800, + -0.042028641800, 0.054233890700, -0.081675879800, 0.225746079600, + 0.169306244900, -0.051903321700, 0.028167737000, -0.018595879900, + 0.013785401700, -0.011308563500, 0.011078958400, -0.012061741600}, + { -0.024744283000, 0.026793421400, -0.029746913300, 0.034851636500, + -0.042259399400, 0.054498561600, -0.081973147300, 0.226418695900, + 0.168601698400, -0.051531646700, 0.027840232000, -0.018299976100, + 0.013522101400, -0.011080393900, 0.010881222400, -0.011895524600}, + { -0.024863147800, 0.026936217500, -0.029922928500, 0.035056918500, + -0.042492896400, 0.054765605000, -0.082283304900, 0.227096876100, + 0.167891670700, -0.051153628100, 0.027517442600, -0.018000408400, + 0.013255461800, -0.010838874100, 0.010679052100, -0.011735641200}, + { -0.024977875100, 0.027084885600, -0.030094081500, 0.035257818300, + -0.042721547900, 0.055017316200, -0.082585986900, 0.227767190600, + 0.167187177600, -0.050791534100, 0.027190997300, -0.017715404200, + 0.012993995900, -0.010611428200, 0.010482366800, -0.011569876800}, + { -0.025080573500, 0.027218999400, -0.030260595300, 0.035452683100, + -0.042943711600, 0.055272032600, -0.082882457600, 0.228431981400, + 0.166486654000, -0.050421461700, 0.026876050100, -0.017423150000, + 0.012733949200, -0.010375664800, 0.010285088600, -0.011414156900}, + { -0.025191900900, 0.027364856000, -0.030429438500, 0.035639110100, + -0.043164809800, 0.055525970000, -0.083187562800, 0.229096722700, + 0.165774369800, -0.050058620700, 0.026560090900, -0.017139617800, + 0.012473832200, -0.010150237400, 0.010079726100, -0.011247937000}, + { -0.025301408600, 0.027496482800, -0.030592744600, 0.035830313000, + -0.043392967400, 0.055777843800, -0.083479360500, 0.229757192500, + 0.165075323300, -0.049688721100, 0.026245793300, -0.016847826800, + 0.012214065900, -0.009924855700, 0.009884244000, -0.011083143100}, + { -0.025396035700, 0.027631903700, -0.030749714100, 0.036015066800, + -0.043603445300, 0.056019673100, -0.083761749100, 0.230408652300, + 0.164383311000, -0.049336013900, 0.025928666900, -0.016571687400, + 0.011960978000, -0.009694581800, 0.009691737700, -0.010931246800}, + { -0.025499892900, 0.027756477200, -0.030904859500, 0.036197201000, + -0.043812102400, 0.056259353500, -0.084039975100, 0.231066748100, + 0.163692118500, -0.048972602300, 0.025620788400, -0.016285463700, + 0.011715943000, -0.009474642200, 0.009500718600, -0.010770500300}, + { -0.025587973900, 0.027873625100, -0.031051508400, 0.036370261500, + -0.044009688500, 0.056487091900, -0.084306442100, 0.231703178600, + 0.163011748400, -0.048628982800, 0.025323118600, -0.016019505400, + 0.011471920900, -0.009252420500, 0.009314904500, -0.010624282400}, + { -0.025686033500, 0.028003222400, -0.031201495400, 0.036546321900, + -0.044210265700, 0.056717963000, -0.084586151300, 0.232344053400, + 0.162326124500, -0.048280259200, 0.025020862500, -0.015749030400, + 0.011223231400, -0.009036452200, 0.009127889900, -0.010466289100}, + { -0.025780065800, 0.028116156900, -0.031342439100, 0.036712214700, + -0.044400455400, 0.056938084300, -0.084843025700, 0.232972740300, + 0.161661569100, -0.047942312100, 0.024729320900, -0.015478042100, + 0.010981773200, -0.008827674600, 0.008936015200, -0.010321897600}, + { -0.025863505900, 0.028237949300, -0.031484316000, 0.036879378400, + -0.044601390800, 0.057159516400, -0.085101130000, 0.233611368900, + 0.160982608100, -0.047598975500, 0.024432461700, -0.015213032100, + 0.010738923200, -0.008606212500, 0.008751093900, -0.010166260400}, + { -0.025952967800, 0.028346554700, -0.031620187000, 0.037040075600, + -0.044785182900, 0.057372021900, -0.085360140300, 0.234232816700, + 0.160311566300, -0.047261891500, 0.024141775800, -0.014953382600, + 0.010499975700, -0.008398761300, 0.008571352800, -0.010014352200}, + { -0.026039689900, 0.028462385300, -0.031755365000, 0.037198907100, + -0.044967652300, 0.057593857200, -0.085609536100, 0.234853461000, + 0.159640235100, -0.046924112000, 0.023850713300, -0.014682289800, + 0.010268343100, -0.008190788700, 0.008389875100, -0.009871323400}, + { -0.026114821800, 0.028563276700, -0.031893498200, 0.037352645400, + -0.045153519300, 0.057798739800, -0.085859387000, 0.235466308600, + 0.158984686900, -0.046592644400, 0.023565684300, -0.014428158400, + 0.010035312500, -0.007978032900, 0.008212777900, -0.009721894700}, + { -0.026197380900, 0.028674126500, -0.032022055600, 0.037504763700, + -0.045327782600, 0.058000802500, -0.086106607600, 0.236087895800, + 0.158318052700, -0.046258378200, 0.023277932300, -0.014171124700, + 0.009798419700, -0.007771917200, 0.008033410300, -0.009580872000}, + { -0.026274692700, 0.028767035600, -0.032140124600, 0.037645625200, + -0.045489825900, 0.058190028400, -0.086329249600, 0.236683031800, + 0.157677386600, -0.045939676500, 0.023004845600, -0.013927827700, + 0.009574249500, -0.007577424400, 0.007864516400, -0.009438250700}, + { -0.026341359100, 0.028870030600, -0.032260085200, 0.037787266600, + -0.045653170900, 0.058390364400, -0.086564748300, 0.237292774000, + 0.157018351900, -0.045622167700, 0.022725762800, -0.013667509600, + 0.009352140300, -0.007377781200, 0.007690545500, -0.009301621700}, + { -0.026415860100, 0.028959823600, -0.032373937700, 0.037922966700, + -0.045819242800, 0.058573065700, -0.086789838900, 0.237883048800, + 0.156379869500, -0.045304400400, 0.022454091700, -0.013426053000, + 0.009130702800, -0.007175647500, 0.007511909600, -0.009157733600}, + { -0.026486139900, 0.029046121000, -0.032483676700, 0.038064564500, + -0.045972814900, 0.058751361600, -0.087010194700, 0.238467918900, + 0.155734052300, -0.044997006400, 0.022195302800, -0.013188518400, + 0.008911224900, -0.006985204400, 0.007346605300, -0.009017719800}, + { -0.026544650500, 0.029137687900, -0.032591551900, 0.038193255500, + -0.046131179100, 0.058937803200, -0.087219020100, 0.239060508500, + 0.155098325200, -0.044681119300, 0.021925282500, -0.012948009400, + 0.008689620600, -0.006792714500, 0.007178451900, -0.008886221300}, + { -0.026609057300, 0.029216641900, -0.032692580400, 0.038314783100, + -0.046272077200, 0.059102015300, -0.087424514000, 0.239631193600, + 0.154463623000, -0.044383621100, 0.021665336500, -0.012716522600, + 0.008486986700, -0.006599218000, 0.007017378200, -0.008749942500}, + { -0.026672135800, 0.029303599100, -0.032793916400, 0.038435074500, + -0.046411553600, 0.059265982800, -0.087629196300, 0.240212813600, + 0.153836655200, -0.044074457000, 0.021413239200, -0.012474342700, + 0.008270409100, -0.006411265100, 0.006853393800, -0.008621850700}, + { -0.026721976200, 0.029375054800, -0.032887333800, 0.038547619600, + -0.046551952900, 0.059432200300, -0.087826286000, 0.240775110200, + 0.153217488200, -0.043782741400, 0.021158865900, -0.012248204100, + 0.008061850300, -0.006230594800, 0.006696553400, -0.008488941000}, + { -0.026779041200, 0.029455506100, -0.032980362900, 0.038659818300, + -0.046682659300, 0.059585807800, -0.088019183800, 0.241345587100, + 0.152598080900, -0.043490655700, 0.020904026300, -0.012020887000, + 0.007862043900, -0.006049746700, 0.006538532000, -0.008365745600}, + { -0.026834459500, 0.029522772700, -0.033069335600, 0.038766487900, + -0.046816052000, 0.059743644400, -0.088207332900, 0.241899935800, + 0.151985072700, -0.043192258600, 0.020662084900, -0.011799401200, + 0.007658087800, -0.005862421100, 0.006382559600, -0.008233810500}, + { -0.026875984700, 0.029594120700, -0.033163420100, 0.038870631000, + -0.046937384600, 0.059887026200, -0.088387814400, 0.242457316000, + 0.151374476700, -0.042907551700, 0.020414981800, -0.011579317500, + 0.007464989300, -0.005687757500, 0.006230560200, -0.008105130300}, + { -0.026927788900, 0.029658101500, -0.033247283000, 0.038982593900, + -0.047067967200, 0.060040729000, -0.088580561000, 0.243020071900, + 0.150758631000, -0.042616124600, 0.020172218500, -0.011356666000, + 0.007258796500, -0.005508592100, 0.006074179500, -0.007983157300}, + { -0.026962754000, 0.029721987600, -0.033321568100, 0.039073486100, + -0.047174313100, 0.060167864400, -0.088743758000, 0.243550330900, + 0.150163766200, -0.042344891600, 0.019938097200, -0.011149489200, + 0.007067136200, -0.005343167000, 0.005919845900, -0.007858967200}, + { -0.027007624400, 0.029777209800, -0.033396349900, 0.039164455700, + -0.047291253700, 0.060307776300, -0.088910561500, 0.244095869900, + 0.149561525700, -0.042065049900, 0.019706338200, -0.010936112200, + 0.006879940500, -0.005173633200, 0.005771106400, -0.007743458100}, + { -0.027050084400, 0.029839930100, -0.033469077500, 0.039252529900, + -0.047403936100, 0.060442764900, -0.089072585900, 0.244635866200, + 0.148963171000, -0.041789645200, 0.019468467400, -0.010725673400, + 0.006686063700, -0.004994838300, 0.005622426600, -0.007617681800}, + { -0.027077485500, 0.029885352000, -0.033531679300, 0.039330446700, + -0.047496635400, 0.060553978800, -0.089229128400, 0.245152841500, + 0.148377088500, -0.041534516600, 0.019252707100, -0.010526788500, + 0.006511584900, -0.004836420600, 0.005483782700, -0.007510337800}, + { -0.027114750800, 0.029942169700, -0.033597878400, 0.039421896500, + -0.047604443400, 0.060683139900, -0.089383414100, 0.245685443800, + 0.147782948500, -0.041261046500, 0.019027766500, -0.010320954700, + 0.006320666200, -0.004670815200, 0.005339424900, -0.007387874000}, + { -0.027149571000, 0.029984769900, -0.033667952600, 0.039498877900, + -0.047694806200, 0.060802547900, -0.089538068000, 0.246211745800, + 0.147194377100, -0.040992574000, 0.018796926000, -0.010115702100, + 0.006140611400, -0.004507476500, 0.005196186900, -0.007276776700}, + { -0.027169252500, 0.030030861700, -0.033722116600, 0.039566909200, + -0.047785728000, 0.060903432100, -0.089671417900, 0.246713790300, + 0.146628884600, -0.040747877600, 0.018590642800, -0.009937335800, + 0.005967529800, -0.004357110300, 0.005065305500, -0.007165536600}, + { -0.027198514200, 0.030078686000, -0.033778171100, 0.039635751800, + -0.047877574700, 0.061014854400, -0.089816950200, 0.247230365400, + 0.146046598800, -0.040483849800, 0.018374848700, -0.009738964200, + 0.005793881200, -0.004199654100, 0.004927883500, -0.007048709900}, + { -0.027225498800, 0.030111200300, -0.033825489900, 0.039706664600, + -0.047952151500, 0.061116139800, -0.089941342900, 0.247735896000, + 0.145484672100, -0.040230608100, 0.018169204500, -0.009551266900, + 0.005619673100, -0.004037760400, 0.004792587900, -0.006944808900}, + { -0.027238906300, 0.030150552400, -0.033871892800, 0.039764687300, + -0.048032095800, 0.061214521700, -0.090072272700, 0.248238004700, + 0.144912324100, -0.039985659800, 0.017964244700, -0.009362610000, + 0.005454760700, -0.003888052900, 0.004661933300, -0.006833632200}, + { -0.027261029400, 0.030177475500, -0.033924139600, 0.039820545400, + -0.048108842000, 0.061311021700, -0.090189496400, 0.248736735600, + 0.144353453000, -0.039744969800, 0.017763535800, -0.009178450500, + 0.005283093400, -0.003739016800, 0.004530986400, -0.006733019100}, + { -0.027278748100, 0.030211452200, -0.033963765200, 0.039872247300, + -0.048170611400, 0.061398382500, -0.090309454700, 0.249217705300, + 0.143792799000, -0.039499198000, 0.017553760300, -0.009003806900, + 0.005123920600, -0.003593946300, 0.004404173000, -0.006624732800}, + { -0.027285607600, 0.030230693900, -0.033995062800, 0.039925613200, + -0.048237360200, 0.061470769900, -0.090420060400, 0.249702348700, + 0.143244619700, -0.039267480800, 0.017361477800, -0.008827892900, + 0.004959568700, -0.003450970500, 0.004278722600, -0.006528665100}, + { -0.027299867000, 0.030260492100, -0.034029532000, 0.039969736100, + -0.048301646800, 0.061551769900, -0.090531292600, 0.250186846500, + 0.142695621300, -0.039034846100, 0.017168425100, -0.008650250700, + 0.004804548400, -0.003309959300, 0.004155528900, -0.006423738600}, + { -0.027311829700, 0.030276666600, -0.034057656100, 0.040007213300, + -0.048358207200, 0.061624954700, -0.090633704900, 0.250662990700, + 0.142142757000, -0.038804959500, 0.016978639900, -0.008476386900, + 0.004641928300, -0.003168595500, 0.004031158500, -0.006328397300}, + { -0.027310222200, 0.030295722900, -0.034091543000, 0.040054114300, + -0.048405249700, 0.061694722300, -0.090720594500, 0.251131429100, + 0.141605434000, -0.038581990000, 0.016794826900, -0.008319159800, + 0.004499540800, -0.003028595200, 0.003904318200, -0.006227186400}, + { -0.027317970100, 0.030306458400, -0.034113893000, 0.040084569400, + -0.048454321900, 0.061758888600, -0.090812420800, 0.251597538600, + 0.141069989900, -0.038359180300, 0.016622514100, -0.008153648900, + 0.004355118400, -0.002896910300, 0.003788978800, -0.006128817400}, + { -0.027322408700, 0.030324310800, -0.034134470700, 0.040112744100, + -0.048500250500, 0.061820252000, -0.090901636600, 0.252061541300, + 0.140535980000, -0.038138344000, 0.016441338600, -0.007987803500, + 0.004199685000, -0.002761828800, 0.003669989500, -0.006038006300}, + { -0.027324602000, 0.030339624000, -0.034151729900, 0.040149371300, + -0.048547165800, 0.061880613500, -0.090988640700, 0.252522799200, + 0.140002708200, -0.037917269600, 0.016259063300, -0.007831507500, + 0.004057015800, -0.002631162900, 0.003555441000, -0.005940171600}, + { -0.027314851900, 0.030338133100, -0.034171796400, 0.040168382200, + -0.048581744400, 0.061940861800, -0.091066456400, 0.252975093200, + 0.139476737000, -0.037702515700, 0.016084224600, -0.007671474500, + 0.003906772800, -0.002500886900, 0.003440460400, -0.005852674900}, + { -0.027312476200, 0.030348556000, -0.034182788700, 0.040197728000, + -0.048620522900, 0.061992180000, -0.091143115000, 0.253426570300, + 0.138949385400, -0.037496867100, 0.015909567200, -0.007521879300, + 0.003770118000, -0.002375530200, 0.003330548600, -0.005758700400}, + { -0.027308900300, 0.030346059500, -0.034189299100, 0.040210075200, + -0.048649242600, 0.062033003000, -0.091219627900, 0.253871405000, + 0.138428727000, -0.037284933600, 0.015748179900, -0.007366427500, + 0.003634318300, -0.002250865900, 0.003220173800, -0.005674994900}, + { -0.027291156400, 0.030346034000, -0.034201654500, 0.040221399100, + -0.048665013800, 0.062069083500, -0.091279630400, 0.254306086800, + 0.137915259700, -0.037080250800, 0.015582215100, -0.007224968800, + 0.003504827500, -0.002131832000, 0.003115541600, -0.005585382500}, + { -0.027283556600, 0.030337810300, -0.034199682900, 0.040235929700, + -0.048687487200, 0.062103176600, -0.091336957100, 0.254740700000, + 0.137411927200, -0.036888380700, 0.015421720100, -0.007078261400, + 0.003366480500, -0.002011695700, 0.003009395700, -0.005505373600}, + { -0.027272064200, 0.030336692900, -0.034199174200, 0.040240217600, + -0.048706450000, 0.062133585500, -0.091390047400, 0.255169264700, + 0.136902218400, -0.036685011500, 0.015268001700, -0.006941296500, + 0.003240948000, -0.001896019300, 0.002907510500, -0.005418047900}, + { -0.027248039400, 0.030331001200, -0.034191966500, 0.040247715100, + -0.048720314000, 0.062167723200, -0.091449297700, 0.255595532100, + 0.136392728600, -0.036493663300, 0.015108636900, -0.006794821100, + 0.003113745000, -0.001779622200, 0.002805447300, -0.005330421900}, + { -0.027233909600, 0.030314184400, -0.034192418000, 0.040245240000, + -0.048730903800, 0.062189717900, -0.091492283600, 0.256016406700, + 0.135898611300, -0.036308554600, 0.014954266000, -0.006664576800, + 0.002983502200, -0.001666038900, 0.002704732600, -0.005254922300}, + { -0.027215455700, 0.030304366300, -0.034180802400, 0.040247773900, + -0.048740138000, 0.062208163600, -0.091531009000, 0.256430891500, + 0.135398740700, -0.036112846200, 0.014809292700, -0.006525103100, + 0.002862380100, -0.001554671000, 0.002606541400, -0.005170558100}, + { -0.027187650100, 0.030281865200, -0.034163561400, 0.040234336300, + -0.048739644700, 0.062216233800, -0.091570002800, 0.256840267600, + 0.134912928100, -0.035934332400, 0.014660767100, -0.006399204100, + 0.002746842300, -0.001447380500, 0.002511020600, -0.005099278600}, + { -0.027164826500, 0.030266800800, -0.034157300200, 0.040233396200, + -0.048743282000, 0.062239840100, -0.091604648700, 0.257250575700, + 0.134413490000, -0.035748887100, 0.014519076200, -0.006262314300, + 0.002627935400, -0.001338252600, 0.002414767800, -0.005016493400}, + { -0.027143371600, 0.030242570200, -0.034137799500, 0.040216442600, + -0.048738434100, 0.062242822500, -0.091637493400, 0.257654713000, + 0.133930500000, -0.035570772900, 0.014382400400, -0.006140611400, + 0.002515774700, -0.001233847400, 0.002321555700, -0.004947156000}, + { -0.027107457100, 0.030220920600, -0.034111888100, 0.040203792100, + -0.048729769800, 0.062241715900, -0.091653939600, 0.258048229900, + 0.133455517400, -0.035401209000, 0.014243272400, -0.006025214200, + 0.002399200400, -0.001132590800, 0.002232582100, -0.004870595400}, + { -0.027078741500, 0.030199249900, -0.034098388500, 0.040182871900, + -0.048718972600, 0.062249560500, -0.091682539100, 0.258446283900, + 0.132964854500, -0.035222237300, 0.014108040800, -0.005893861400, + 0.002284840400, -0.001026848500, 0.002138024300, -0.004799901300}, + { -0.027050861400, 0.030166932700, -0.034068442600, 0.040165553900, + -0.048705371900, 0.062241192800, -0.091701376100, 0.258836019600, + 0.132491612100, -0.035051605900, 0.013979073500, -0.005779960100, + 0.002180035700, -0.000929347100, 0.002051981700, -0.004725695500}, + { -0.027009514400, 0.030138847000, -0.034036485500, 0.040133308800, + -0.048693489400, 0.062239873700, -0.091706871400, 0.259230513500, + 0.132019240400, -0.034882878100, 0.013840666600, -0.005663538600, + 0.002073337300, -0.000829862900, 0.001962511100, -0.004659557800}, + { -0.026976072400, 0.030111284500, -0.034014280800, 0.040115213700, + -0.048678076000, 0.062229403200, -0.091722403400, 0.259617106900, + 0.131545261000, -0.034721796900, 0.013714416500, -0.005551787400, + 0.001970438800, -0.000734060100, 0.001877974400, -0.004586585500}, + { -0.026942508300, 0.030072123900, -0.033977399000, 0.040076906100, + -0.048648464400, 0.062216113000, -0.091727265200, 0.259993523100, + 0.131081652600, -0.034558763500, 0.013594098000, -0.005434268500, + 0.001868519600, -0.000639267000, 0.001792774700, -0.004523786800}, + { -0.026894698800, 0.030035043200, -0.033945864700, 0.040048640600, + -0.048621392100, 0.062194230500, -0.091718251200, 0.260362390800, + 0.130622438100, -0.034399268600, 0.013476831400, -0.005332669200, + 0.001763978200, -0.000548375700, 0.001712493000, -0.004454372600}, + { -0.026858101400, 0.029992822400, -0.033904910800, 0.040006010900, + -0.048586130300, 0.062174189600, -0.091714875900, 0.260730415900, + 0.130162737200, -0.034249709200, 0.013361670100, -0.005230956800, + 0.001670082700, -0.000460660800, 0.001634796200, -0.004387061400}, + { -0.026817551300, 0.029956756400, -0.033861618500, 0.039972655000, + -0.048554302300, 0.062145158100, -0.091709577400, 0.261097309800, + 0.129704208100, -0.034089428900, 0.013243154600, -0.005126787000, + 0.001573784600, -0.000370001900, 0.001552956500, -0.004327008900}, + { -0.026764892400, 0.029914914700, -0.033824115200, 0.039936894900, + -0.048518010200, 0.062122650400, -0.091701578900, 0.261471099700, + 0.129242600400, -0.033937289100, 0.013125955200, -0.005023257700, + 0.001477736900, -0.000278806000, 0.001483063100, -0.004260417800}, + { -0.026723942900, 0.029866954600, -0.033776306400, 0.039885777900, + -0.048484357900, 0.062095944700, -0.091689502800, 0.261832669700, + 0.128796851700, -0.033792097000, 0.013014623800, -0.004924853700, + 0.001386785000, -0.000193120700, 0.001405618600, -0.004204208100}, + { -0.026677369100, 0.029823200100, -0.033723634100, 0.039840829600, + -0.048439828000, 0.062052525700, -0.091667171300, 0.262181947100, + 0.128351507600, -0.033643014900, 0.012908660300, -0.004821497300, + 0.001297966400, -0.000110045000, 0.001331802200, -0.004139947900}, + { -0.026631089400, 0.029779644500, -0.033684178100, 0.039790688300, + -0.048395229300, 0.062022592400, -0.091650848600, 0.262539269000, + 0.127907136500, -0.033498033100, 0.012797985100, -0.004723290400, + 0.001205478700, -0.000034034800, 0.001256538900, -0.004085082700}, + { -0.026573902300, 0.029720893300, -0.033623583100, 0.039737596900, + -0.048340055700, 0.061978479200, -0.091617477500, 0.262888924200, + 0.127463754300, -0.033360614900, 0.012695304100, -0.004633631600, + 0.001122432500, 0.000044341700, 0.001186615500, -0.004024302800}, + { -0.026522150600, 0.029670790800, -0.033575106100, 0.039688914300, + -0.048301375000, 0.061932964300, -0.091590421200, 0.263234790300, + 0.127016546500, -0.033218503400, 0.012588312500, -0.004539881200, + 0.001035742900, 0.000126628900, 0.001111797200, -0.003970051000}, + { -0.026473220500, 0.029612627700, -0.033515051500, 0.039624070900, + -0.048240646500, 0.061883959100, -0.091552786700, 0.263570751000, + 0.126588986000, -0.033087770300, 0.012491700600, -0.004455578400, + 0.000957623400, 0.000200514600, 0.001045891900, -0.003912507000}, + { -0.026409434000, 0.029556965600, -0.033460776600, 0.039568933000, + -0.048182001300, 0.061834912800, -0.091525806900, 0.263910245800, + 0.126156834400, -0.032952152900, 0.012390643600, -0.004367158600, + 0.000875358500, 0.000278697700, 0.000974841200, -0.003861457500}, + { -0.026353317400, 0.029502101900, -0.033394973500, 0.039508550400, + -0.048117964600, 0.061780340600, -0.091479732400, 0.264247652100, + 0.125720795200, -0.032819571200, 0.012293251100, -0.004282623500, + 0.000796996500, 0.000353021900, 0.000908270600, -0.003803350600}, + { -0.026300063600, 0.029438925700, -0.033329672000, 0.039436382500, + -0.048060026200, 0.061726016900, -0.091434512400, 0.264577145700, + 0.125295550200, -0.032687548300, 0.012206896600, -0.004201204700, + 0.000720904000, 0.000425856500, 0.000841447500, -0.003755895200}, + { -0.026232862100, 0.029378736300, -0.033269511800, 0.039375017600, + -0.047995635500, 0.061659588400, -0.091383468500, 0.264900163300, + 0.124873224300, -0.032570746200, 0.012117690200, -0.004123793300, + 0.000649052900, 0.000494443100, 0.000780101600, -0.003702105200}, + { -0.026173704500, 0.029320278800, -0.033199111900, 0.039309873100, + -0.047925510600, 0.061597170100, -0.091339033000, 0.265233959000, + 0.124448392000, -0.032439785200, 0.012020034000, -0.004049722900, + 0.000572266200, 0.000556866100, 0.000718027400, -0.003647327300}, + { -0.026115105800, 0.029249328200, -0.033136595000, 0.039232475500, + -0.047860778200, 0.061535070700, -0.091283444100, 0.265553269700, + 0.124029148500, -0.032311531400, 0.011937971200, -0.003972488800, + 0.000499981800, 0.000626385600, 0.000653931700, -0.003602065500}, + { -0.026053040700, 0.029187959100, -0.033061913700, 0.039161387400, + -0.047784155800, 0.061464903000, -0.091231578000, 0.265868569100, + 0.123611901100, -0.032198029300, 0.011852192100, -0.003898279700, + 0.000430866400, 0.000692495800, 0.000594828700, -0.003550040600}, + { -0.025979190500, 0.029119110500, -0.032993298700, 0.039088687600, + -0.047717396700, 0.061398326100, -0.091168632300, 0.266191486300, + 0.123192522800, -0.032068754900, 0.011769073200, -0.003818881300, + 0.000367747300, 0.000760093900, 0.000531731300, -0.003505638300}, + { -0.025916736900, 0.029044984700, -0.032912027100, 0.039010819600, + -0.047633284300, 0.061319593900, -0.091106317700, 0.266496871700, + 0.122782327100, -0.031960441800, 0.011688497100, -0.003749535300, + 0.000303040600, 0.000822340300, 0.000475867100, -0.003456266600}, + { -0.025852313500, 0.028979555600, -0.032847149600, 0.038930581400, + -0.047553631700, 0.061249320900, -0.091041565400, 0.266807297400, + 0.122364892800, -0.031842624600, 0.011609993400, -0.003674451700, + 0.000230091300, 0.000883119400, 0.000424433200, -0.003415490600}, + { -0.025775412400, 0.028907556300, -0.032761339500, 0.038846825400, + -0.047473566900, 0.061166435400, -0.090972059800, 0.267117532200, + 0.121966313400, -0.031726469200, 0.011526657700, -0.003614337200, + 0.000167921300, 0.000943784400, 0.000369742900, -0.003367277200}, + { -0.025709333200, 0.028827467700, -0.032686245400, 0.038767569400, + -0.047387160100, 0.061086213100, -0.090894622700, 0.267415888400, + 0.121558234900, -0.031616336600, 0.011456238300, -0.003548028100, + 0.000105206400, 0.001004973300, 0.000312623600, -0.003327797000}, + { -0.025638962100, 0.028755960800, -0.032600073600, 0.038681603800, + -0.047304659400, 0.061000604600, -0.090822485700, 0.267711413800, + 0.121152150800, -0.031508074100, 0.011387738000, -0.003483800400, + 0.000044884500, 0.001063521800, 0.000259812600, -0.003280931700}, + { -0.025558748500, 0.028679119400, -0.032520166800, 0.038596766500, + -0.047212175500, 0.060911299100, -0.090745034900, 0.268015321100, + 0.120754514200, -0.031402432600, 0.011309574900, -0.003414828400, + -0.000008679700, 0.001122626500, 0.000204180800, -0.003242762700}, + { -0.025491004800, 0.028598264500, -0.032433214400, 0.038499397300, + -0.047124517500, 0.060822958600, -0.090656855500, 0.268302409700, + 0.120353034900, -0.031296168100, 0.011241308700, -0.003361438800, + -0.000066919400, 0.001168676900, 0.000156014300, -0.003199268400}, + { -0.025418085200, 0.028521763200, -0.032353226800, 0.038413339800, + -0.047029530200, 0.060731151800, -0.090577351800, 0.268593285800, + 0.119959312000, -0.031191167100, 0.011175918200, -0.003299906800, + -0.000125395300, 0.001226211900, 0.000102202900, -0.003162561900}, + { -0.025341820300, 0.028442350400, -0.032256880200, 0.038315834400, + -0.046933803100, 0.060629970500, -0.090484913100, 0.268880111900, + 0.119563660200, -0.031092550300, 0.011105920600, -0.003240720400, + -0.000180668100, 0.001280388200, 0.000052969800, -0.003118681600}, + { -0.025257897700, 0.028361984300, -0.032173379200, 0.038225851900, + -0.046832398400, 0.060542535700, -0.090401757000, 0.269167050700, + 0.119168850700, -0.030996156800, 0.011040895200, -0.003190341200, + -0.000235221900, 0.001334856800, 0.000001652200, -0.003084097600}, + { -0.025185554200, 0.028274348700, -0.032076686500, 0.038128681100, + -0.046738202500, 0.060444527700, -0.090299385000, 0.269453183200, + 0.118781343500, -0.030894968100, 0.010978749000, -0.003129872000, + -0.000282666000, 0.001376710400, -0.000043286100, -0.003043352800}, + { -0.025109965600, 0.028194618000, -0.031994340000, 0.038028249000, + -0.046633690800, 0.060342765300, -0.090208244500, 0.269732724800, + 0.118393292300, -0.030792208700, 0.010914101700, -0.003080051000, + -0.000337127500, 0.001433304900, -0.000085506800, -0.003010799000}, + { -0.025021832500, 0.028108476700, -0.031903279600, 0.037928400600, + -0.046534338000, 0.060237393200, -0.090110535200, 0.270004747300, + 0.118009163200, -0.030704739700, 0.010859951400, -0.003028924400, + -0.000385645100, 0.001481455800, -0.000129367200, -0.002971404300}, + { -0.024945375600, 0.028016349500, -0.031801130600, 0.037824453100, + -0.046429802200, 0.060137232300, -0.090009416000, 0.270285855600, + 0.117623618000, -0.030603272900, 0.010796286200, -0.002978759900, + -0.000428383200, 0.001531519200, -0.000177806100, -0.002939110000}, + { -0.024865514300, 0.027931517600, -0.031710668200, 0.037725963100, + -0.046320155000, 0.060028342200, -0.089907403100, 0.270554278500, + 0.117240864200, -0.030515733500, 0.010742257600, -0.002927315300, + -0.000479303700, 0.001570599400, -0.000219487800, -0.002900989400}, + { -0.024784044100, 0.027845171900, -0.031606514800, 0.037619046000, + -0.046212603400, 0.059912731300, -0.089796024200, 0.270824597700, + 0.116863187300, -0.030421218200, 0.010686302700, -0.002886726400, + -0.000524060200, 0.001616196800, -0.000261546500, -0.002863090600}, + { -0.024696294000, 0.027746587900, -0.031508865800, 0.037513479000, + -0.046095149100, 0.059794346400, -0.089682984500, 0.271082832700, + 0.116487577100, -0.030338939200, 0.010637555700, -0.002840569000, + -0.000568717900, 0.001661742100, -0.000305400500, -0.002834574300}, + { -0.024614179100, 0.027660621000, -0.031405176100, 0.037406419500, + -0.045986571500, 0.059689822100, -0.089576437400, 0.271346326500, + 0.116105596800, -0.030249393200, 0.010579370800, -0.002794808000, + -0.000608943300, 0.001697477500, -0.000344868200, -0.002798079400}, + { -0.024528121700, 0.027568162200, -0.031304965400, 0.037295135500, + -0.045873231900, 0.059565965700, -0.089466219000, 0.271612337400, + 0.115727569700, -0.030164681700, 0.010528906800, -0.002747163000, + -0.000654868000, 0.001744376300, -0.000389873200, -0.002768719300}, + { -0.024437594700, 0.027478273400, -0.031210219100, 0.037192230500, + -0.045758252600, 0.059449185200, -0.089354351300, 0.271872904900, + 0.115356267200, -0.030075860600, 0.010482924400, -0.002707124200, + -0.000689215900, 0.001788462900, -0.000419849000, -0.002735490700}, + { -0.024355993600, 0.027379891900, -0.031100796100, 0.037065976700, + -0.045634938700, 0.059329528700, -0.089228688900, 0.272129953900, + 0.114983831800, -0.029994677500, 0.010435538400, -0.002661448300, + -0.000735439500, 0.001824188800, -0.000461001300, -0.002708812100}, + { -0.024270058800, 0.027286742200, -0.031000132500, 0.036954725600, + -0.045521223000, 0.059206370700, -0.089107334300, 0.272380972500, + 0.114620841300, -0.029915068400, 0.010387943600, -0.002628339500, + -0.000773306300, 0.001863568800, -0.000497529300, -0.002675426700}, + { -0.024182088200, 0.027193422300, -0.030886948300, 0.036835959600, + -0.045398291800, 0.059082508900, -0.088988209700, 0.272637989000, + 0.114246195000, -0.029830210100, 0.010335223100, -0.002588198100, + -0.000806920200, 0.001905685000, -0.000539287700, -0.002648556200}, + { -0.024087333600, 0.027097712600, -0.030784892400, 0.036723589400, + -0.045271977200, 0.058953274300, -0.088859686000, 0.272881898500, + 0.113886584200, -0.029750846500, 0.010301351000, -0.002549947700, + -0.000846562800, 0.001934940900, -0.000572476600, -0.002617432700}, + { -0.024001590900, 0.026993402700, -0.030679000400, 0.036606443900, + -0.045150554000, 0.058832779800, -0.088732072900, 0.273125908800, + 0.113513193200, -0.029676327000, 0.010252711500, -0.002513122700, + -0.000877067200, 0.001974049600, -0.000611735800, -0.002592531600}, + { -0.023913083300, 0.026898455900, -0.030563119800, 0.036484791400, + -0.045025119300, 0.058693934100, -0.088603102200, 0.273374591600, + 0.113154412600, -0.029598886400, 0.010207398600, -0.002481943700, + -0.000913965300, 0.002015094700, -0.000638597900, -0.002562081700}, + { -0.023815374100, 0.026799344000, -0.030457501700, 0.036367072300, + -0.044902786100, 0.058572790800, -0.088473828000, 0.273618006800, + 0.112791388300, -0.029513195500, 0.010165016900, -0.002444944300, + -0.000947146600, 0.002045354700, -0.000675641700, -0.002538546200}, + { -0.023725485500, 0.026701008800, -0.030349857900, 0.036247749400, + -0.044767731000, 0.058431854000, -0.088329074400, 0.273857995500, + 0.112436237300, -0.029450197300, 0.010130406600, -0.002412292800, + -0.000979178500, 0.002079209800, -0.000707489700, -0.002509178900}, + { -0.023639414500, 0.026596958900, -0.030232021100, 0.036123651800, + -0.044638462800, 0.058300810100, -0.088200445900, 0.274096422800, + 0.112076957700, -0.029366888100, 0.010090405600, -0.002377569500, + -0.001010435600, 0.002107901300, -0.000743082100, -0.002486798400}, + { -0.023547531800, 0.026496227300, -0.030121514300, 0.036000215000, + -0.044509951300, 0.058159209800, -0.088052895500, 0.274333594900, + 0.111721745400, -0.029301943900, 0.010052135800, -0.002353097400, + -0.001040420000, 0.002140548500, -0.000774072300, -0.002458072800}, + { -0.023446642300, 0.026393710400, -0.030010656200, 0.035875683000, + -0.044378805200, 0.058025057300, -0.087919930200, 0.274567466000, + 0.111361951300, -0.029228839400, 0.010016783300, -0.002322951600, + -0.001065376500, 0.002174932100, -0.000809275400, -0.002436583600}, + { -0.023356224600, 0.026296883500, -0.029893238400, 0.035750612400, + -0.044247467300, 0.057893310100, -0.087777694100, 0.274797647300, + 0.111004330000, -0.029157146400, 0.009982935500, -0.002295711000, + -0.001101449700, 0.002203852900, -0.000831096900, -0.002409751900}, + { -0.023265168600, 0.026184102500, -0.029776226200, 0.035620616000, + -0.044111852100, 0.057741079400, -0.087628679600, 0.275027780000, + 0.110654857700, -0.029083534200, 0.009941856300, -0.002266843400, + -0.001124397300, 0.002236651900, -0.000865190200, -0.002389152800}, + { -0.023164098500, 0.026080272800, -0.029663348400, 0.035494092700, + -0.043965976300, 0.057598214300, -0.087485388500, 0.275252475700, + 0.110312292600, -0.029019980500, 0.009918196800, -0.002237980900, + -0.001155997700, 0.002258762900, -0.000891896000, -0.002363396600}, + { -0.023071295000, 0.025980032600, -0.029541639900, 0.035363736500, + -0.043828523400, 0.057457528900, -0.087329639600, 0.275481985700, + 0.109957830700, -0.028949989000, 0.009886431100, -0.002211532800, + -0.001177431500, 0.002290080300, -0.000924936400, -0.002343785600}, + { -0.022977857000, 0.025877315200, -0.029428661900, 0.035236477300, + -0.043694747100, 0.057307956100, -0.087183966300, 0.275705340200, + 0.109614224000, -0.028883563000, 0.009858208200, -0.002190231800, + -0.001207356700, 0.002311491600, -0.000951184900, -0.002318342500}, + { -0.022884324300, 0.025762575100, -0.029306783100, 0.035112146900, + -0.043556341700, 0.057163011400, -0.087035380900, 0.275924294400, + 0.109260641300, -0.028826112600, 0.009821204200, -0.002164204600, + -0.001227918200, 0.002341848700, -0.000983084300, -0.002299519300}, + { -0.022782491400, 0.025659050200, -0.029181547900, 0.034977723200, + -0.043413931200, 0.057015156000, -0.086882727400, 0.276151572300, + 0.108916066600, -0.028757551100, 0.009790138000, -0.002137523800, + -0.001252260400, 0.002366059500, -0.001001252000, -0.002275798800}, + { -0.022685765900, 0.025552190100, -0.029064307100, 0.034844676000, + -0.043271529100, 0.056868813300, -0.086719885800, 0.276361694700, + 0.108569779600, -0.028692402200, 0.009764314400, -0.002119185700, + -0.001277542100, 0.002395334200, -0.001032008700, -0.002257986500}, + { -0.022589602700, 0.025445511400, -0.028946376900, 0.034711343700, + -0.043130672100, 0.056708946400, -0.086558899300, 0.276581678800, + 0.108230473600, -0.028628118700, 0.009738656200, -0.002099057700, + -0.001292564500, 0.002420334600, -0.001056977600, -0.002234052300}, + { -0.022496001600, 0.025344724600, -0.028822677500, 0.034577788700, + -0.042988680400, 0.056561115200, -0.086406171500, 0.276798805200, + 0.107884769000, -0.028567755500, 0.009710099800, -0.002074387600, + -0.001314845000, 0.002440979000, -0.001085689500, -0.002217436800}, + { -0.022394383100, 0.025226614100, -0.028700012900, 0.034439922500, + -0.042840028100, 0.056405162700, -0.086243310500, 0.277004327300, + 0.107550806200, -0.028506052900, 0.009685232500, -0.002068874300, + -0.001333423100, 0.002464213000, -0.001108669600, -0.002195061100}, + { -0.022297564100, 0.025119808500, -0.028581781400, 0.034305305000, + -0.042695656200, 0.056253954500, -0.086083834300, 0.277226300200, + 0.107203619900, -0.028443348800, 0.009654833100, -0.002042234800, + -0.001358263800, 0.002489758900, -0.001130552500, -0.002179375900}, + { -0.022199944300, 0.025011964800, -0.028462099000, 0.034168711000, + -0.042548622300, 0.056098971500, -0.085921130000, 0.277432448700, + 0.106864987700, -0.028388245500, 0.009632664400, -0.002024860800, + -0.001370844300, 0.002512274500, -0.001153285000, -0.002157159200}, + { -0.022094548100, 0.024903208900, -0.028330041100, 0.034025320000, + -0.042394273800, 0.055936365900, -0.085747792700, 0.277639650500, + 0.106531925400, -0.028326674100, 0.009610046500, -0.002009028700, + -0.001396393300, 0.002530754500, -0.001179865400, -0.002142396800}, + { -0.021995201800, 0.024793027300, -0.028206269200, 0.033896113600, + -0.042248483600, 0.055780937400, -0.085583072500, 0.277843916500, + 0.106192034600, -0.028266589600, 0.009595095600, -0.001993279700, + -0.001408007500, 0.002552535400, -0.001202056000, -0.002120617500}, + { -0.021901583000, 0.024677037500, -0.028084273300, 0.033757782300, + -0.042099685900, 0.055624216900, -0.085416983300, 0.278047516100, + 0.105852072900, -0.028207292800, 0.009568367500, -0.001970246700, + -0.001429511100, 0.002575063400, -0.001221321700, -0.002106926400}, + { -0.021801611400, 0.024565652100, -0.027960051300, 0.033615404000, + -0.041944857600, 0.055460095600, -0.085239563600, 0.278250526200, + 0.105517476900, -0.028154247100, 0.009549327200, -0.001957811700, + -0.001451556400, 0.002589582900, -0.001241753500, -0.002086264900}, + { -0.021699539700, 0.024460899000, -0.027831832100, 0.033477267300, + -0.041798158600, 0.055292010700, -0.085067297700, 0.278450786400, + 0.105189430300, -0.028099508700, 0.009524686500, -0.001949621100, + -0.001461338600, 0.002611261800, -0.001266673600, -0.002073264900}, + { -0.021599986800, 0.024350066900, -0.027708847400, 0.033335865400, + -0.041644581200, 0.055128391400, -0.084889121800, 0.278653475900, + 0.104851735700, -0.028039339300, 0.009510206500, -0.001933711100, + -0.001475643600, 0.002624167700, -0.001286367400, -0.002053034800}, + { -0.021501456000, 0.024240038400, -0.027586029200, 0.033194974400, + -0.041491644800, 0.054965250700, -0.084713840400, 0.278849784900, + 0.104526215400, -0.027986541700, 0.009490451500, -0.001918547800, + -0.001486507400, 0.002645934000, -0.001311257700, -0.002040050600}, + { -0.021403770100, 0.024118477100, -0.027455391500, 0.033058704200, + -0.041335183200, 0.054809534600, -0.084538637100, 0.279042526200, + 0.104190543900, -0.027927521300, 0.009462422300, -0.001906698600, + -0.001502518600, 0.002663196100, -0.001323447400, -0.002021097800}, + { -0.021298248700, 0.024009912800, -0.027321991400, 0.032911712700, + -0.041174711300, 0.054635449300, -0.084361255500, 0.279243346600, + 0.103861290100, -0.027880462700, 0.009455425900, -0.001900093500, + -0.001518047500, 0.002684579000, -0.001347505300, -0.002009220500}, + { -0.021198957800, 0.023898644100, -0.027197497600, 0.032768596100, + -0.041018806300, 0.054468678800, -0.084179631800, 0.279432441200, + 0.103538923200, -0.027828661600, 0.009436950100, -0.001885898300, + -0.001530065500, 0.002695060500, -0.001364801200, -0.001990835900}, + { -0.021097697500, 0.023785545000, -0.027071437200, 0.032621932300, + -0.040870290800, 0.054302452500, -0.083997214900, 0.279619234800, + 0.103205041100, -0.027767473800, 0.009419594300, -0.001879709700, + -0.001538161700, 0.002715202100, -0.001388824500, -0.001978335000}, + { -0.020996580900, 0.023672059200, -0.026941818000, 0.032485359700, + -0.040714985700, 0.054134314800, -0.083810986900, 0.279815921900, + 0.102875847200, -0.027719967200, 0.009399847000, -0.001863555500, + -0.001552949300, 0.002731039900, -0.001399562800, -0.001960990100}, + { -0.020894037400, 0.023552023700, -0.026814717500, 0.032339646100, + -0.040555637300, 0.053961014100, -0.083632987700, 0.280004451900, + 0.102547554800, -0.027669483100, 0.009389127400, -0.001850389700, + -0.001565501700, 0.002743236000, -0.001421126700, -0.001950196300}, + { -0.020796180800, 0.023444081100, -0.026681026200, 0.032192706400, + -0.040395032300, 0.053787235200, -0.083442004400, 0.280184129200, + 0.102229591200, -0.027619025000, 0.009370146400, -0.001848435300, + -0.001568643300, 0.002757857900, -0.001437420100, -0.001933077600}, + { -0.020695687800, 0.023331738800, -0.026554662000, 0.032046118400, + -0.040233651400, 0.053610471900, -0.083256870000, 0.280377353300, + 0.101900357800, -0.027567373500, 0.009358983900, -0.001835136600, + -0.001581275300, 0.002770198900, -0.001459273400, -0.001922140900}, + { -0.020596163500, 0.023219719400, -0.026426870800, 0.031911263600, + -0.040078628800, 0.053441716200, -0.083070602600, 0.280564653200, + 0.101581276800, -0.027518324200, 0.009347882600, -0.001837111200, + -0.001593896400, 0.002791306400, -0.001469185600, -0.001906217800}, + { -0.020486105200, 0.023102669400, -0.026295748600, 0.031757335000, + -0.039919465700, 0.053275619900, -0.082878251100, 0.280742505700, + 0.101258880600, -0.027473598000, 0.009331442300, -0.001823983800, + -0.001605491000, 0.002802122100, -0.001489715900, -0.001896148500}, + { -0.020386038100, 0.022990763600, -0.026169842500, 0.031611536400, + -0.039758154000, 0.053098254300, -0.082692676700, 0.280922614500, + 0.100932867600, -0.027421788700, 0.009318026600, -0.001820868000, + -0.001613216100, 0.002809677500, -0.001504548000, -0.001879711300}, + { -0.020285435000, 0.022877455000, -0.026042393200, 0.031463335700, + -0.039594468800, 0.052919415400, -0.082490397700, 0.281103932300, + 0.100609916000, -0.027373640000, 0.009311303900, -0.001812957700, + -0.001617884900, 0.002826213600, -0.001525588000, -0.001869926500}, + { -0.020192073400, 0.022761936700, -0.025903756400, 0.031323202100, + -0.039434316600, 0.052741885600, -0.082304345000, 0.281286138200, + 0.100291413500, -0.027321921500, 0.009296869200, -0.001808517000, + -0.001627660100, 0.002838490600, -0.001533110500, -0.001855192900}, + { -0.020082641400, 0.022645343300, -0.025773553200, 0.031169977600, + -0.039274706000, 0.052574179500, -0.082107814900, 0.281460005200, + 0.099967050900, -0.027269207300, 0.009282507800, -0.001804192200, + -0.001636760100, 0.002847881000, -0.001552740800, -0.001845760100}, + { -0.019981165400, 0.022530400400, -0.025642740200, 0.031017613200, + -0.039105374000, 0.052385802800, -0.081905722200, 0.281637176300, + 0.099657892600, -0.027230252500, 0.009272886300, -0.001799068700, + -0.001637727000, 0.002860056400, -0.001566710200, -0.001830818000}, + { -0.019880568400, 0.022416781800, -0.025512202200, 0.030878936400, + -0.038945254600, 0.052207335300, -0.081716496200, 0.281816351400, + 0.099338786400, -0.027175974500, 0.009256393500, -0.001792635900, + -0.001649711300, 0.002875237500, -0.001579820600, -0.001822507700}, + { -0.019782364100, 0.022306311600, -0.025387269800, 0.030733042100, + -0.038783583100, 0.052030462600, -0.081515568600, 0.281989192800, + 0.099021663300, -0.027135898100, 0.009251959300, -0.001785903000, + -0.001655707600, 0.002880377400, -0.001592362400, -0.001808127200}, + { -0.019671249500, 0.022187350300, -0.025253455800, 0.030574773900, + -0.038616812900, 0.051850662000, -0.081315971000, 0.282153907600, + 0.098702548800, -0.027086143300, 0.009241521900, -0.001786417100, + -0.001657398400, 0.002894723100, -0.001611646700, -0.001799471600}, + { -0.019570606700, 0.022073795700, -0.025122240000, 0.030434707800, + -0.038454440300, 0.051668545300, -0.081120147400, 0.282326299200, + 0.098383808800, -0.027042545900, 0.009231168400, -0.001786186700, + -0.001662356200, 0.002899683900, -0.001624082400, -0.001785075000}, + { -0.019479635100, 0.021960407900, -0.024988398200, 0.030286004000, + -0.038289759200, 0.051487224000, -0.080910856000, 0.282503350600, + 0.098066057000, -0.026984567000, 0.009223573800, -0.001783286000, + -0.001671632200, 0.002912741900, -0.001635839700, -0.001778008400}, + { -0.019377155600, 0.021844654800, -0.024856282600, 0.030128070300, + -0.038122909500, 0.051306676900, -0.080708954700, 0.282668132600, + 0.097753369800, -0.026946759500, 0.009222427700, -0.001780604500, + -0.001670895300, 0.002923256000, -0.001648349000, -0.001764156100}, + { -0.019269257400, 0.021728932000, -0.024723351000, 0.029986133000, + -0.037958156200, 0.051121120600, -0.080507440900, 0.282834989500, + 0.097436064800, -0.026902305100, 0.009211401200, -0.001779605600, + -0.001677015500, 0.002930209300, -0.001665754600, -0.001756624600}, + { -0.019170951200, 0.021617945900, -0.024597228000, 0.029837934200, + -0.037790101700, 0.050945404700, -0.080311308100, 0.283008922400, + 0.097122904600, -0.026855831200, 0.009197158400, -0.001772074700, + -0.001678044700, 0.002939243500, -0.001670904800, -0.001743866400}, + { -0.019071619200, 0.021505473200, -0.024469290200, 0.029686120900, + -0.037632549500, 0.050764840500, -0.080100778400, 0.283171957600, + 0.096806832700, -0.026811221400, 0.009186304800, -0.001771162500, + -0.001683731000, 0.002945624400, -0.001687977400, -0.001736577200}, + { -0.018968488000, 0.021388132400, -0.024333413500, 0.029538083300, + -0.037457346700, 0.050578218900, -0.079887593500, 0.283337590700, + 0.096492955100, -0.026767354800, 0.009192163800, -0.001770832400, + -0.001681516300, 0.002955079600, -0.001699869700, -0.001723295500}, + { -0.018862478100, 0.021274150600, -0.024204158900, 0.029384739600, + -0.037297550600, 0.050392415300, -0.079683587600, 0.283504106100, + 0.096183264500, -0.026722407800, 0.009180232100, -0.001768308200, + -0.001689724600, 0.002967018600, -0.001710235900, -0.001717442700}, + { -0.018761561600, 0.021159356200, -0.024070689600, 0.029241121700, + -0.037129754000, 0.050201664600, -0.079472717100, 0.283660491100, + 0.095870802000, -0.026679621900, 0.009171925300, -0.001770702400, + -0.001691965100, 0.002969448100, -0.001720493300, -0.001704658800}, + { -0.018668595300, 0.021041132500, -0.023942524200, 0.029090435800, + -0.036957408700, 0.050019918900, -0.079266196800, 0.283824708400, + 0.095561100700, -0.026630607200, 0.009168743300, -0.001772231300, + -0.001696435000, 0.002974894800, -0.001736659300, -0.001698209200}, + { -0.018571534300, 0.020933121700, -0.023805663500, 0.028933443100, + -0.036793287300, 0.049827569900, -0.079052887900, 0.283981176300, + 0.095258665900, -0.026591723800, 0.009164052800, -0.001778570200, + -0.001691727400, 0.002982552200, -0.001746931600, -0.001686115500}, + { -0.018463951300, 0.020817422100, -0.023671874000, 0.028788840800, + -0.036622085000, 0.049644890900, -0.078843755900, 0.284139414500, + 0.094937302500, -0.026549370400, 0.009150120900, -0.001771057900, + -0.001693172900, 0.002992954400, -0.001756770400, -0.001680418200}, + { -0.018364139000, 0.020703740000, -0.023541974700, 0.028632330500, + -0.036454574300, 0.049461377000, -0.078632258000, 0.284297024000, + 0.094630560000, -0.026501577500, 0.009148770700, -0.001774906500, + -0.001694643800, 0.002994642500, -0.001766316400, -0.001668313800}, + { -0.018266186600, 0.020592415900, -0.023412213700, 0.028492007500, + -0.036289687800, 0.049272679100, -0.078421164800, 0.284457324700, + 0.094316696700, -0.026457830700, 0.009146283300, -0.001777143500, + -0.001698314300, 0.002999487700, -0.001781999900, -0.001662265200}, + { -0.018165847100, 0.020477739000, -0.023280404100, 0.028333282800, + -0.036119295000, 0.049084885500, -0.078204266000, 0.284608657200, + 0.094014760000, -0.026416553200, 0.009139701600, -0.001781460200, + -0.001696266500, 0.003012636900, -0.001785770200, -0.001651378600}, + { -0.018061008600, 0.020364436200, -0.023149218600, 0.028191490700, + -0.035952564600, 0.048893523500, -0.077988760200, 0.284764041400, + 0.093702911100, -0.026373328800, 0.009137987500, -0.001784607900, + -0.001699031600, 0.003016678700, -0.001800824700, -0.001645793200}, + { -0.017963440300, 0.020253539400, -0.023021819100, 0.028040064200, + -0.035778114400, 0.048706755700, -0.077771874700, 0.284916529600, + 0.093393907400, -0.026335334700, 0.009129169500, -0.001782780900, + -0.001693615300, 0.003016455700, -0.001809252200, -0.001634392700}, + { -0.017864500900, 0.020140353900, -0.022889140600, 0.027892719100, + -0.035614488000, 0.048525355700, -0.077560338000, 0.285077698000, + 0.093086772800, -0.026290376100, 0.009124974400, -0.001782837800, + -0.001700124000, 0.003027010500, -0.001818503700, -0.001629703000}, + { -0.017767797800, 0.020029767800, -0.022762632500, 0.027742539400, + -0.035443127000, 0.048328815000, -0.077336969100, 0.285223614300, + 0.092784329800, -0.026242686200, 0.009124739900, -0.001788644700, + -0.001699564500, 0.003026957800, -0.001826555800, -0.001618684700}, + { -0.017673081200, 0.019908421800, -0.022626375700, 0.027591688900, + -0.035275280700, 0.048141797500, -0.077116537300, 0.285372738700, + 0.092474868600, -0.026199049700, 0.009123179000, -0.001792125700, + -0.001701864700, 0.003030366000, -0.001841112200, -0.001613422400}, + { -0.017567749800, 0.019794038100, -0.022494731800, 0.027432475300, + -0.035103343800, 0.047947719600, -0.076901860200, 0.285524247500, + 0.092174773000, -0.026162495500, 0.009115656500, -0.001791780100, + -0.001692544800, 0.003041601400, -0.001843678200, -0.001603558400}, + { -0.017470863500, 0.019683203200, -0.022364869500, 0.027291613500, + -0.034936118500, 0.047753629900, -0.076679031600, 0.285671811500, + 0.091863686900, -0.026115241800, 0.009107820400, -0.001801618700, + -0.001693384100, 0.003044500800, -0.001858063000, -0.001598323500}, + { -0.017374543300, 0.019573449500, -0.022237957200, 0.027136848800, + -0.034768617100, 0.047567511000, -0.076458633000, 0.285823700200, + 0.091556728800, -0.026079677000, 0.009109488100, -0.001807759500, + -0.001692821000, 0.003044301800, -0.001865780400, -0.001587867000}, + { -0.017278016500, 0.019462639700, -0.022107866200, 0.026994361100, + -0.034597273100, 0.047381154500, -0.076238222500, 0.285975563800, + 0.091251439600, -0.026032138000, 0.009103004000, -0.001803039800, + -0.001692015100, 0.003052700600, -0.001873935800, -0.001583828300}, + { -0.017174506000, 0.019350009900, -0.021978862300, 0.026838198900, + -0.034430712700, 0.047182370700, -0.076007137400, 0.286112971700, + 0.090946916500, -0.025990224500, 0.009104300700, -0.001810064900, + -0.001690153500, 0.003051424200, -0.001880890400, -0.001573670100}, + { -0.017077386900, 0.019238909500, -0.021847402400, 0.026693327400, + -0.034255105000, 0.046987841400, -0.075788745300, 0.286262228400, + 0.090639789400, -0.025952532400, 0.009104248700, -0.001814795900, + -0.001691600600, 0.003054176700, -0.001894703800, -0.001569153000}, + { -0.016981596000, 0.019128983200, -0.021720457700, 0.026538690300, + -0.034087052200, 0.046799004200, -0.075562967900, 0.286407594900, + 0.090337279400, -0.025905423200, 0.009096027200, -0.001821529400, + -0.001680332600, 0.003064309200, -0.001896839700, -0.001559561500}, + { -0.016884247400, 0.019017118400, -0.021587898200, 0.026389844800, + -0.033919719800, 0.046610354100, -0.075335881700, 0.286552421600, + 0.090031502000, -0.025867448300, 0.009095852900, -0.001826242300, + -0.001681411600, 0.003066530900, -0.001910366600, -0.001555232200}, + { -0.016783704800, 0.018907663300, -0.021462324800, 0.026239733700, + -0.033746731200, 0.046405855900, -0.075110245000, 0.286693376000, + 0.089731283500, -0.025821879500, 0.009092452800, -0.001825134600, + -0.001676528000, 0.003070488600, -0.001910937800, -0.001546377000}, + { -0.016688282800, 0.018797704300, -0.021331826000, 0.026093533900, + -0.033581867500, 0.046219477100, -0.074884840400, 0.286843179800, + 0.089429913200, -0.025781349400, 0.009085971800, -0.001835675000, + -0.001676580800, 0.003072425300, -0.001924344300, -0.001542197500}, + { -0.016594569100, 0.018690756000, -0.021207672000, 0.025943420100, + -0.033404937000, 0.046023517900, -0.074664683000, 0.286977836300, + 0.089126683900, -0.025743532000, 0.009086549100, -0.001841296500, + -0.001676983800, 0.003074065100, -0.001937151500, -0.001538275800}, + { -0.016498716200, 0.018580469200, -0.021077164200, 0.025796245500, + -0.033239161900, 0.045835202700, -0.074435467900, 0.287119707900, + 0.088817903900, -0.025695953100, 0.009088750200, -0.001840528300, + -0.001672419000, 0.003078214900, -0.001938044200, -0.001529263800}, + { -0.016399128000, 0.018471685500, -0.020951717000, 0.025642665600, + -0.033070898400, 0.045645401700, -0.074204448300, 0.287263294400, + 0.088518791800, -0.025655699800, 0.009083077500, -0.001851968500, + -0.001671519700, 0.003079287600, -0.001950784000, -0.001525545100}, + { -0.016303461100, 0.018361514000, -0.020820608400, 0.025497283800, + -0.032893481400, 0.045445650100, -0.073974168600, 0.287398826600, + 0.088215611900, -0.025616230100, 0.009082036700, -0.001853404200, + -0.001663416900, 0.003076434500, -0.001956698100, -0.001516114100}, + { -0.016215838300, 0.018247246600, -0.020690412400, 0.025351871200, + -0.032729171400, 0.045259152900, -0.073748515200, 0.287535419700, + 0.087916911500, -0.025573878100, 0.009073875600, -0.001862252200, + -0.001665725900, 0.003083968700, -0.001963597200, -0.001513327100}, + { -0.016122658700, 0.018140302000, -0.020565897500, 0.025198358100, + -0.032563221800, 0.045054475300, -0.073516228400, 0.287669639200, + 0.087611730400, -0.025527589100, 0.009078560600, -0.001864713500, + -0.001657013000, 0.003080858100, -0.001969379300, -0.001503969900}, + { -0.016030164800, 0.018033299200, -0.020438589400, 0.025057752300, + -0.032390709200, 0.044862930000, -0.073281372400, 0.287808844900, + 0.087312662900, -0.025484515600, 0.009070381000, -0.001873996900, + -0.001658952100, 0.003088203600, -0.001976310900, -0.001501262300}, + { -0.015931594800, 0.017925901000, -0.020314095600, 0.024903471000, + -0.032220613500, 0.044666232000, -0.073051883700, 0.287948850700, + 0.087007865500, -0.025444714700, 0.009076396600, -0.001876815400, + -0.001650306000, 0.003085015800, -0.001981877500, -0.001492305300}, + { -0.015837646000, 0.017817156300, -0.020183755000, 0.024755980300, + -0.032052144100, 0.044470365700, -0.072825294900, 0.288078851000, + 0.086711998100, -0.025403194900, 0.009070080400, -0.001888369000, + -0.001648979900, 0.003085609100, -0.001993991000, -0.001488858500}, + { -0.015745786000, 0.017710651600, -0.020057087700, 0.024614982500, + -0.031879037500, 0.044276425800, -0.072585572700, 0.288212217900, + 0.086414765800, -0.025357212300, 0.009074959300, -0.001891836000, + -0.001636592700, 0.003094108300, -0.001994653500, -0.001480767400}, + { -0.015654004700, 0.017605115800, -0.019933041100, 0.024461035300, + -0.031707875300, 0.044077625700, -0.072351635400, 0.288347603800, + 0.086112304400, -0.025320844800, 0.009069894600, -0.001903992100, + -0.001634855100, 0.003094518400, -0.002006818100, -0.001477449400}, + { -0.015556426100, 0.017497985500, -0.019805930700, 0.024316777100, + -0.031543979900, 0.043889313200, -0.072118967800, 0.288475887700, + 0.085809125600, -0.025278532500, 0.009073340000, -0.001904355000, + -0.001629128000, 0.003097408700, -0.002006309300, -0.001469685300}, + { -0.015464035900, 0.017390367400, -0.019676404700, 0.024169673500, + -0.031375546100, 0.043691563100, -0.071884560100, 0.288613216300, + 0.085517904200, -0.025234780000, 0.009063993200, -0.001909503200, + -0.001621883700, 0.003096710200, -0.002018075200, -0.001466497000}, + { -0.015372811900, 0.017282467800, -0.019563622700, 0.024025743900, + -0.031201529400, 0.043494252000, -0.071653858800, 0.288739140200, + 0.085215783300, -0.025192315000, 0.009065729600, -0.001924642300, + -0.001617413500, 0.003093622800, -0.002023130000, -0.001457861300}, + { -0.015281581500, 0.017176704600, -0.019436707600, 0.023880818700, + -0.031035919800, 0.043302468300, -0.071411887600, 0.288870320800, + 0.084911325500, -0.025147413900, 0.009067220900, -0.001923185400, + -0.001613820700, 0.003099574200, -0.002029502500, -0.001455518600}, + { -0.015190068200, 0.017070271400, -0.019307909900, 0.023733865500, + -0.030866540700, 0.043102902900, -0.071173336500, 0.289002944200, + 0.084617972200, -0.025112304400, 0.009063501400, -0.001937412700, + -0.001609315800, 0.003096284800, -0.002034312100, -0.001447330000}, + { -0.015095192700, 0.016966497900, -0.019187203800, 0.023583328300, + -0.030698827600, 0.042906585900, -0.070941485100, 0.289127389400, + 0.084313384800, -0.025064553400, 0.009058701400, -0.001941756600, + -0.001604479400, 0.003101858000, -0.002040355800, -0.001445044100}, + { -0.015005498900, 0.016861988700, -0.019061283500, 0.023439043400, + -0.030533338900, 0.042714039500, -0.070696301300, 0.289257793800, + 0.084018453400, -0.025022964500, 0.009063774800, -0.001944654100, + -0.001595199500, 0.003097508000, -0.002044866400, -0.001436913900}, + { -0.014917519700, 0.016759937100, -0.018938392400, 0.023301552900, + -0.030361552500, 0.042516727200, -0.070462608200, 0.289383840200, + 0.083722905800, -0.024979066000, 0.009063562600, -0.001958070000, + -0.001592655700, 0.003097143300, -0.002056302800, -0.001434061400}, + { -0.014829329300, 0.016657963500, -0.018818329000, 0.023150063400, + -0.030188546200, 0.042326526200, -0.070230755300, 0.289510095600, + 0.083425985400, -0.024932767200, 0.009060147700, -0.001964069200, + -0.001585752700, 0.003099557300, -0.002055331700, -0.001426747600}, + { -0.014740487700, 0.016554974400, -0.018693430700, 0.023006696000, + -0.030022736600, 0.042129344000, -0.069991474400, 0.289643060400, + 0.083121682100, -0.024891262300, 0.009057517500, -0.001970699600, + -0.001577810900, 0.003098336700, -0.002066861800, -0.001423747700}, + { -0.014646034300, 0.016450370300, -0.018567891400, 0.022863023200, + -0.029856412200, 0.041932542200, -0.069755255200, 0.289764874500, + 0.082826383600, -0.024845070000, 0.009054717400, -0.001980678900, + -0.001582625700, 0.003096053600, -0.002064831500, -0.001416907600}, + { -0.014559627800, 0.016349891900, -0.018449559000, 0.022713561300, + -0.029689051900, 0.041737590700, -0.069503998900, 0.289888685100, + 0.082532448100, -0.024801087800, 0.009058174200, -0.001982046700, + -0.001575159300, 0.003094262100, -0.002075770200, -0.001414337900}, + { -0.014471061400, 0.016243913500, -0.018334760000, 0.022579662500, + -0.029519935000, 0.041541189800, -0.069267292000, 0.290011569200, + 0.082230235800, -0.024758694700, 0.009055155500, -0.001988704700, + -0.001567128600, 0.003092724100, -0.002086898100, -0.001411478900}, + { -0.014383790400, 0.016142525900, -0.018211358000, 0.022437490000, + -0.029354488900, 0.041343948500, -0.069028755000, 0.290131292800, + 0.081930443400, -0.024718526100, 0.009055027000, -0.002001893100, + -0.001564993600, 0.003095765300, -0.002085806300, -0.001404564500}, + { -0.014297586900, 0.016042263500, -0.018092816300, 0.022287585300, + -0.029185104300, 0.041142095700, -0.068780397800, 0.290253164200, + 0.081635398000, -0.024670766100, 0.009051397000, -0.002008669700, + -0.001556387000, 0.003093691000, -0.002096680100, -0.001401882300}, + { -0.014206281900, 0.015941116300, -0.017970542200, 0.022146976500, + -0.029021815100, 0.040946718000, -0.068542945900, 0.290377478100, + 0.081340267800, -0.024629081800, 0.009048347000, -0.002014963700, + -0.001548946600, 0.003095699600, -0.002095226400, -0.001395190900}, + { -0.014118938600, 0.015839406100, -0.017846481100, 0.022002842700, + -0.028850387900, 0.040754474000, -0.068302308800, 0.290494347700, + 0.081040072300, -0.024586190700, 0.009045183800, -0.002021575300, + -0.001540744900, 0.003093740800, -0.002106039500, -0.001392505100}, + { -0.014033359700, 0.015739387800, -0.017724211200, 0.021861411400, + -0.028685336800, 0.040555731500, -0.068055928700, 0.290621547300, + 0.080744453800, -0.024543547500, 0.009041815800, -0.002027861900, + -0.001533174400, 0.003095698800, -0.002104702000, -0.001385829000}, + { -0.013950756900, 0.015643569600, -0.017610720400, 0.021716480000, + -0.028520619600, 0.040359106100, -0.067815030400, 0.290742857900, + 0.080448884900, -0.024496193000, 0.009046978300, -0.002042499600, + -0.001529706600, 0.003094623000, -0.002115681000, -0.001383359700}, + { -0.013857978600, 0.015536744300, -0.017494757400, 0.021577567700, + -0.028357551400, 0.040162405000, -0.067572563200, 0.290861113600, + 0.080155270500, -0.024453193600, 0.009043386000, -0.002048697200, + -0.001522077700, 0.003096377800, -0.002113994500, -0.001376849300}, + { -0.013773229200, 0.015437926100, -0.017374226900, 0.021440417100, + -0.028180320800, 0.039967168700, -0.067326482200, 0.290971758300, + 0.079856769100, -0.024409237700, 0.009039889200, -0.002055553100, + -0.001513509600, 0.003093955600, -0.002124476100, -0.001374342300}, + { -0.013689059900, 0.015339335300, -0.017253688600, 0.021300179900, + -0.028015676300, 0.039767446600, -0.067075438600, 0.291093788100, + 0.079560972200, -0.024359858000, 0.009042893100, -0.002064956100, + -0.001502425900, 0.003088564100, -0.002128108700, -0.001366787300}, + { -0.013608286800, 0.015245837400, -0.017142104300, 0.021157264500, + -0.027853162500, 0.039572395400, -0.066833494600, 0.291213669600, + 0.079258669800, -0.024318259900, 0.009036431600, -0.002066451500, + -0.001504139700, 0.003088416900, -0.002132688700, -0.001365518600}, + { -0.013526024300, 0.015149195700, -0.017023199600, 0.021019023100, + -0.027690881700, 0.039375542700, -0.066588317200, 0.291332516600, + 0.078972791400, -0.024276630700, 0.009034254800, -0.002074639200, + -0.001493462900, 0.003082938500, -0.002135814900, -0.001358430400}, + { -0.013436720300, 0.015049535100, -0.016902143900, 0.020877384400, + -0.027520351800, 0.039182293600, -0.066341740000, 0.291445410400, + 0.078678890800, -0.024226476800, 0.009036993800, -0.002087255200, + -0.001492702300, 0.003088470800, -0.002141367000, -0.001357082800}, + { -0.013351892900, 0.014946715800, -0.016789045200, 0.020740566100, + -0.027358305900, 0.038984672800, -0.066093324400, 0.291557196100, + 0.078381474100, -0.024188073400, 0.009035188200, -0.002095570400, + -0.001481921800, 0.003082911700, -0.002144524900, -0.001349842500}, + { -0.013270943900, 0.014852276800, -0.016672927800, 0.020605326600, + -0.027198631300, 0.038789809200, -0.065848400700, 0.291673553600, + 0.078077054200, -0.024136747400, 0.009027973200, -0.002105937200, + -0.001475208500, 0.003087524600, -0.002150215500, -0.001348116400}, + { -0.013190641200, 0.014758176300, -0.016560228700, 0.020459538900, + -0.027027766200, 0.038596149400, -0.065599338300, 0.291786795400, + 0.077792721000, -0.024094239400, 0.009025651700, -0.002114130700, + -0.001464447200, 0.003081710900, -0.002153019500, -0.001341225900}, + { -0.013110011800, 0.014663919000, -0.016444050200, 0.020323488600, + -0.026866946500, 0.038399388600, -0.065350608700, 0.291898436700, + 0.077491289100, -0.024045414700, 0.009025408000, -0.002119522600, + -0.001458535300, 0.003086316400, -0.002158494200, -0.001339767200}, + { -0.013025077600, 0.014569262800, -0.016328233200, 0.020188140300, + -0.026707041800, 0.038203933500, -0.065102449400, 0.292014040700, + 0.077196425000, -0.023998381000, 0.009026360900, -0.002125747900, + -0.001454699900, 0.003075666900, -0.002160549700, -0.001332912300}, + { -0.012943770500, 0.014473806100, -0.016209483700, 0.020047933100, + -0.026536730800, 0.038005130000, -0.064858733100, 0.292129090700, + 0.076903557900, -0.023957398700, 0.009021448800, -0.002130265600, + -0.001449188900, 0.003080409800, -0.002165817700, -0.001331821400}, + { -0.012862479300, 0.014374613700, -0.016100609100, 0.019915634000, + -0.026378597000, 0.037810235300, -0.064609585900, 0.292243989500, + 0.076607212500, -0.023906702500, 0.009015226300, -0.002142422000, + -0.001439416300, 0.003077588100, -0.002176246500, -0.001329411400}, + { -0.012776898100, 0.014287400700, -0.015987370400, 0.019781456700, + -0.026218842900, 0.037613307800, -0.064357589600, 0.292354833000, + 0.076314227000, -0.023860018100, 0.009017528200, -0.002151344500, + -0.001428408900, 0.003071783900, -0.002179145600, -0.001322405100}, + { -0.012699499300, 0.014196955700, -0.015878862800, 0.019639469300, + -0.026050975100, 0.037421391100, -0.064106762900, 0.292466530500, + 0.076019700600, -0.023810871100, 0.009017151300, -0.002156670200, + -0.001422547300, 0.003076233500, -0.002184458600, -0.001321218400}, + { -0.012616476800, 0.014104155900, -0.015764904400, 0.019506037600, + -0.025892093800, 0.037225319700, -0.063853917500, 0.292576319400, + 0.075724712100, -0.023759323400, 0.009010001600, -0.002167557500, + -0.001417698000, 0.003065150800, -0.002186268300, -0.001314354900}, + { -0.012537078000, 0.014007257400, -0.015657767200, 0.019375159900, + -0.025735481500, 0.037031767000, -0.063607547900, 0.292680753800, + 0.075427591700, -0.023721555300, 0.009004937300, -0.002171714300, + -0.001412157700, 0.003069674600, -0.002191380800, -0.001313321900}, + { -0.012457823800, 0.013913853300, -0.015541317500, 0.019236546600, + -0.025565504100, 0.036831074500, -0.063356530800, 0.292786821600, + 0.075136252200, -0.023673990600, 0.009007040500, -0.002180765400, + -0.001401081300, 0.003063717500, -0.002193998000, -0.001306486000}, + { -0.012381206600, 0.013823584100, -0.015429059800, 0.019104315500, + -0.025407276700, 0.036634397500, -0.063101285300, 0.292893802300, + 0.074841359700, -0.023620522900, 0.008998337100, -0.002190657800, + -0.001393998700, 0.003067771800, -0.002199287800, -0.001305173000}, + { -0.012304800000, 0.013733603800, -0.015316494200, 0.018970177400, + -0.025242884300, 0.036443834300, -0.062848029800, 0.293006277700, + 0.074548400000, -0.023577987000, 0.009001070400, -0.002199540200, + -0.001383193600, 0.003061911500, -0.002201990500, -0.001298486200}, + { -0.012231594800, 0.013648384700, -0.015213765300, 0.018835062100, + -0.025085987100, 0.036250520900, -0.062599496300, 0.293108050300, + 0.074248575600, -0.023530332000, 0.008998523600, -0.002201711800, + -0.001384640400, 0.003061515200, -0.002206188900, -0.001297565000}, + { -0.012146903000, 0.013548919900, -0.015104288800, 0.018700312600, + -0.024918766800, 0.036051519300, -0.062347556700, 0.293216599600, + 0.073961694600, -0.023479470700, 0.008992759700, -0.002215250000, + -0.001372665000, 0.003055268400, -0.002208668300, -0.001290828100}, + { -0.012072689000, 0.013461432300, -0.014994590500, 0.018570057000, + -0.024762400200, 0.035856112000, -0.062091368600, 0.293326498300, + 0.073667719600, -0.023433472500, 0.008991998000, -0.002220358400, + -0.001366666100, 0.003059661400, -0.002213970900, -0.001289852900}, + { -0.011998777300, 0.013374412100, -0.014885760000, 0.018440114900, + -0.024601980400, 0.035669418700, -0.061843667600, 0.293431988400, + 0.073372194300, -0.023379395200, 0.008991178300, -0.002234495000, + -0.001354819600, 0.003053533900, -0.002216603000, -0.001282913700}, + { -0.011922344800, 0.013284279500, -0.014771995300, 0.018300582900, + -0.024446846900, 0.035466290700, -0.061588331200, 0.293533527800, + 0.073072951600, -0.023330451200, 0.008988073400, -0.002236664600, + -0.001356054900, 0.003053154500, -0.002220878800, -0.001282008300}, + { -0.011845461600, 0.013189357000, -0.014666643200, 0.018169987600, + -0.024284154200, 0.035275317400, -0.061329463600, 0.293638750200, + 0.072780111500, -0.023282772800, 0.008982191000, -0.002249832100, + -0.001344236100, 0.003046802300, -0.002223339400, -0.001275293500}, + { -0.011769115200, 0.013103666900, -0.014559744600, 0.018043115400, + -0.024131925000, 0.035084175400, -0.061079622500, 0.293746271400, + 0.072492005600, -0.023234912400, 0.008978152700, -0.002247134700, + -0.001333772300, 0.003050212200, -0.002228201400, -0.001274483600}, + { -0.011694882200, 0.013015910500, -0.014448925000, 0.017910112100, + -0.023966290300, 0.034885265900, -0.060823698800, 0.293850684700, + 0.072198731500, -0.023185828700, 0.008970913900, -0.002258841200, + -0.001323837000, 0.003046799700, -0.002238111400, -0.001272408000}, + { -0.011623815900, 0.012932068300, -0.014343601700, 0.017784544000, + -0.023814273900, 0.034693878200, -0.060571265600, 0.293950616300, + 0.071902922400, -0.023127290700, 0.008965455900, -0.002267675400, + -0.001322245300, 0.003043324900, -0.002234895800, -0.001266776300}, + { -0.011540516700, 0.012843156400, -0.014239964400, 0.017653808300, + -0.023649793700, 0.034495386400, -0.060314068800, 0.294053570900, + 0.071604519400, -0.023084721700, 0.008966178100, -0.002274742400, + -0.001313420800, 0.003040255700, -0.002244873300, -0.001264793200}, + { -0.011472239700, 0.012762833200, -0.014142299300, 0.017523724800, + -0.023496551600, 0.034302863600, -0.060059835200, 0.294155332000, + 0.071318036600, -0.023035573000, 0.008958134900, -0.002285404200, + -0.001304581400, 0.003041364800, -0.002242376300, -0.001259366100}, + { -0.011396549400, 0.012678155200, -0.014036439400, 0.017396575200, + -0.023338009400, 0.034116321500, -0.059806163900, 0.294253594500, + 0.071016115000, -0.022981051800, 0.008953591700, -0.002294842900, + -0.001302187900, 0.003033306900, -0.002251340000, -0.001257327400}, + { -0.011323121700, 0.012586727200, -0.013934406600, 0.017269467600, + -0.023182283200, 0.033913734300, -0.059546703200, 0.294357625700, + 0.070729691900, -0.022931857800, 0.008949854300, -0.002293460500, + -0.001289637300, 0.003033725600, -0.002248666800, -0.001252011800}, + { -0.011252540100, 0.012503092200, -0.013827892600, 0.017136721300, + -0.023029828700, 0.033729073000, -0.059293518700, 0.294460157000, + 0.070434097200, -0.022879051900, 0.008946945700, -0.002305315700, + -0.001279931900, 0.003030359000, -0.002258640400, -0.001249868500}, + { -0.011182377900, 0.012419958100, -0.013722251500, 0.017009920100, + -0.022874644300, 0.033527611400, -0.059037642400, 0.294550501800, + 0.070143346800, -0.022826323700, 0.008936004500, -0.002312971700, + -0.001278495600, 0.003026834400, -0.002255007200, -0.001244616700}, + { -0.011112979600, 0.012337731100, -0.013617730300, 0.016882921900, + -0.022715294600, 0.033338099000, -0.058774188400, 0.294654140800, + 0.069848353600, -0.022774221100, 0.008938942400, -0.002318660300, + -0.001272367300, 0.003031143500, -0.002260491700, -0.001243631400}, + { -0.011036494600, 0.012247285200, -0.013517704400, 0.016757477900, + -0.022556496100, 0.033144550900, -0.058521997300, 0.294752983200, + 0.069553175900, -0.022719357000, 0.008933910800, -0.002324464800, + -0.001256128400, 0.003023848100, -0.002262428500, -0.001237037300}, + { -0.010969358900, 0.012167754700, -0.013416137200, 0.016635331800, + -0.022407065400, 0.032953544400, -0.058263509000, 0.294849437700, + 0.069259760100, -0.022670102300, 0.008922281200, -0.002330427000, + -0.001256945700, 0.003023412200, -0.002266647300, -0.001236321600}, + { -0.010900887700, 0.012086623000, -0.013312472500, 0.016509174100, + -0.022247384700, 0.032758671600, -0.058008634000, 0.294944915100, + 0.068966542100, -0.022616489300, 0.008919626500, -0.002343674400, + -0.001245178500, 0.003017029900, -0.002268726500, -0.001229834600}, + { -0.010824042300, 0.012004282000, -0.013216314100, 0.016383459600, + -0.022105269700, 0.032570797200, -0.057751817000, 0.295047141500, + 0.068676416600, -0.022561101300, 0.008913972000, -0.002352971000, + -0.001238353400, 0.003021279300, -0.002274200700, -0.001228791200}, + { -0.010755431700, 0.011922721200, -0.013111568500, 0.016255565900, + -0.021943625600, 0.032372518000, -0.057486437700, 0.295142943400, + 0.068385169700, -0.022513896200, 0.008909637200, -0.002350338500, + -0.001230956100, 0.003009550000, -0.002274839200, -0.001222885000}, + { -0.010688812000, 0.011843615700, -0.013010433300, 0.016132334200, + -0.021787511700, 0.032185366300, -0.057225248100, 0.295234898600, + 0.068091568200, -0.022457194700, 0.008903658200, -0.002359880400, + -0.001223949400, 0.003013484800, -0.002280163100, -0.001221768100}, + { -0.010616490000, 0.011757803200, -0.012915294700, 0.016012759800, + -0.021638667600, 0.031988588100, -0.056969680800, 0.295335292700, + 0.067795719500, -0.022406367600, 0.008900068000, -0.002371512600, + -0.001214114500, 0.003010061800, -0.002290053700, -0.001219729000}, + { -0.010551509800, 0.011680400300, -0.012815991100, 0.015891596000, + -0.021485136600, 0.031804218000, -0.056710441700, 0.295433201300, + 0.067505660900, -0.022347745200, 0.008890402300, -0.002371663200, + -0.001208867400, 0.003006011700, -0.002286436600, -0.001214616000}, + { -0.010484290500, 0.011600637200, -0.012712731400, 0.015760653300, + -0.021331865900, 0.031610777900, -0.056451724100, 0.295523834300, + 0.067206178800, -0.022295584100, 0.008886317900, -0.002383054500, + -0.001199058000, 0.003002379300, -0.002296230600, -0.001212436700}, + { -0.010418126800, 0.011517277400, -0.012619826700, 0.015643456400, + -0.021185866100, 0.031421091300, -0.056188447800, 0.295617466000, + 0.066918218000, -0.022237804200, 0.008878791700, -0.002390705100, + -0.001198036500, 0.002999278000, -0.002292788000, -0.001207424300}, + { -0.010353664500, 0.011440787900, -0.012520884000, 0.015521915600, + -0.021030519400, 0.031228731500, -0.055930893500, 0.295714067400, + 0.066622310400, -0.022183982500, 0.008871703300, -0.002394208300, + -0.001184119100, 0.002994763100, -0.002302348400, -0.001205321400}, + { -0.010290103500, 0.011365248300, -0.012423245400, 0.015401999100, + -0.020877200500, 0.031043216800, -0.055667568800, 0.295807177000, + 0.066328312100, -0.022131427200, 0.008866742900, -0.002404798200, + -0.001175466100, 0.002996071300, -0.002300011200, -0.001200034300}, + { -0.010212808300, 0.011286215600, -0.012331974000, 0.015285837100, + -0.020731638800, 0.030848876200, -0.055414282300, 0.295894792100, + 0.066040074000, -0.022070359700, 0.008854931500, -0.002403060700, + -0.001172180300, 0.002994947800, -0.002303988400, -0.001199255200}, + { -0.010148001100, 0.011208895700, -0.012231165800, 0.015157088900, + -0.020580030600, 0.030655950200, -0.055152663300, 0.295985303100, + 0.065747514100, -0.022018328800, 0.008851064800, -0.002415287100, + -0.001160745800, 0.002988510300, -0.002305832600, -0.001193029800}, + { -0.010085579200, 0.011134601100, -0.012134860100, 0.015038441400, + -0.020427698700, 0.030471019600, -0.054887446700, 0.296075847400, + 0.065452832100, -0.021962597300, 0.008842662600, -0.002421544800, + -0.001161750800, 0.002988338000, -0.002310334400, -0.001192214700}, + { -0.010020900500, 0.011052809300, -0.012042691300, 0.014920355400, + -0.020274570600, 0.030279417600, -0.054626925700, 0.296172290700, + 0.065163770900, -0.021908764800, 0.008835761000, -0.002425750100, + -0.001146354300, 0.002981060500, -0.002311933800, -0.001186088400}, + { -0.009959891000, 0.010980172100, -0.011948361700, 0.014804636400, + -0.020129945500, 0.030089125800, -0.054357841800, 0.296258886800, + 0.064870816200, -0.021853498100, 0.008828516100, -0.002434510900, + -0.001139351300, 0.002984984200, -0.002317376800, -0.001185031900}, + { -0.009898260300, 0.010906814900, -0.011851836600, 0.014680546000, + -0.019983871100, 0.029902690900, -0.054105959400, 0.296352017200, + 0.064574678500, -0.021802440100, 0.008820504400, -0.002435747100, + -0.001132168000, 0.002973757000, -0.002317979100, -0.001179124900}, + { -0.009829464000, 0.010824029000, -0.011759300000, 0.014562038800, + -0.019829369800, 0.029708485400, -0.053839700800, 0.296436562600, + 0.064282383100, -0.021746613300, 0.008813206900, -0.002444614700, + -0.001125189300, 0.002977740500, -0.002323403700, -0.001178072500}, + { -0.009762586000, 0.010757748000, -0.011668677900, 0.014449006700, + -0.019682702400, 0.029529044900, -0.053581877800, 0.296526171200, + 0.063990447900, -0.021679886200, 0.008799692100, -0.002450355900, + -0.001116909400, 0.002966009600, -0.002323961500, -0.001171914500}, + { -0.009702145300, 0.010685674300, -0.011573598100, 0.014326849900, + -0.019542481200, 0.029334344500, -0.053317589600, 0.296618745800, + 0.063695013000, -0.021628522100, 0.008793030100, -0.002459004700, + -0.001110081400, 0.002970330900, -0.002329660800, -0.001170874500}, + { -0.009639674100, 0.010606181300, -0.011483842100, 0.014211122400, + -0.019391070400, 0.029143138800, -0.053052763700, 0.296709109200, + 0.063405648500, -0.021569686100, 0.008780917600, -0.002456863700, + -0.001107234800, 0.002969450300, -0.002333956200, -0.001170125300}, + { -0.009581441000, 0.010536858700, -0.011393027300, 0.014097865300, + -0.019243677500, 0.028962045800, -0.052791038800, 0.296794244500, + 0.063110931400, -0.021511634200, 0.008780017100, -0.002469118700, + -0.001096137300, 0.002963079500, -0.002335754800, -0.001163895700}, + { -0.009515365500, 0.010456770600, -0.011303346500, 0.013982426200, + -0.019092348700, 0.028770277400, -0.052523860000, 0.296881395600, + 0.062822494300, -0.021452275300, 0.008767593500, -0.002466860000, + -0.001093302800, 0.002962141700, -0.002339996800, -0.001163170100}, + { -0.009457035400, 0.010387174600, -0.011211408300, 0.013863735500, + -0.018955461900, 0.028578727400, -0.052264429900, 0.296965665500, + 0.062526684700, -0.021390879000, 0.008758231200, -0.002482874600, + -0.001081090900, 0.002955577100, -0.002341776700, -0.001156792500}, + { -0.009400556800, 0.010319669900, -0.011122203700, 0.013752383900, + -0.018810443800, 0.028400005500, -0.052003778300, 0.297056151300, + 0.062236110400, -0.021336172300, 0.008746430300, -0.002480260300, + -0.001078834400, 0.002954987600, -0.002346166500, -0.001156099600}, + { -0.009332105500, 0.010246624400, -0.011036161100, 0.013639306600, + -0.018660250400, 0.028208189500, -0.051734216700, 0.297140614200, + 0.061941420000, -0.021276152500, 0.008744611200, -0.002492352100, + -0.001067597300, 0.002948573300, -0.002347967100, -0.001149887200}, + { -0.009272360200, 0.010174446400, -0.010935538100, 0.013522491900, + -0.018517173300, 0.028021068000, -0.051473353600, 0.297221172300, + 0.061647632000, -0.021220051600, 0.008732236600, -0.002489611700, + -0.001065202900, 0.002947918700, -0.002352278400, -0.001149066000}, + { -0.009213325600, 0.010098901500, -0.010850127500, 0.013411298100, + -0.018369198800, 0.027831555500, -0.051205897900, 0.297312718800, + 0.061355453400, -0.021156852500, 0.008721014300, -0.002503786900, + -0.001055769400, 0.002949420200, -0.002350196200, -0.001143817000}, + { -0.009154686400, 0.010032437200, -0.010762989400, 0.013303147900, + -0.018232338700, 0.027647756700, -0.050939241800, 0.297396279600, + 0.061067211800, -0.021101691400, 0.008709501200, -0.002502402100, + -0.001050877900, 0.002940699400, -0.002358437400, -0.001142165600}, + { -0.009098345900, 0.009964812300, -0.010672807300, 0.013184994200, + -0.018090535100, 0.027462057700, -0.050678910500, 0.297482132700, + 0.060775711600, -0.021036808600, 0.008695284800, -0.002507997500, + -0.001037794300, 0.002941289900, -0.002355925500, -0.001136940000}, + { -0.009040073800, 0.009890164700, -0.010588375400, 0.013074472100, + -0.017943142100, 0.027273058500, -0.050413365200, 0.297557115100, + 0.060482529200, -0.020973444000, 0.008689737800, -0.002515641700, + -0.001036539700, 0.002933528900, -0.002364533600, -0.001134965700}, + { -0.008978175500, 0.009829858100, -0.010503877000, 0.012966752800, + -0.017800540400, 0.027094554000, -0.050147624100, 0.297640606500, + 0.060185287000, -0.020914048900, 0.008683823800, -0.002517293800, + -0.001024611700, 0.002934410200, -0.002362132000, -0.001129829700}, + { -0.008920174300, 0.009755024700, -0.010418098600, 0.012849997300, + -0.017658755900, 0.026908098600, -0.049883464500, 0.297721129900, + 0.059897161500, -0.020855847900, 0.008665084700, -0.002525885700, + -0.001025446300, 0.002934650000, -0.002366817000, -0.001128974000}, + { -0.008866407500, 0.009690722000, -0.010331978800, 0.012740654600, + -0.017513336800, 0.026720176000, -0.049613614700, 0.297814833400, + 0.059603377300, -0.020796619900, 0.008659451000, -0.002528098100, + -0.001012468200, 0.002930143500, -0.002376553600, -0.001126794900}, + { -0.008809499600, 0.009626161500, -0.010246944100, 0.012634266200, + -0.017376589500, 0.026529402400, -0.049347375400, 0.297893741900, + 0.059312998000, -0.020728754500, 0.008642272200, -0.002530568000, + -0.001007501500, 0.002926808400, -0.002372946000, -0.001121930300}, + { -0.008753278600, 0.009553595700, -0.010164038100, 0.012520217600, + -0.017238147400, 0.026350960600, -0.049076921100, 0.297970747600, + 0.059018701600, -0.020669669800, 0.008638257800, -0.002540198000, + -0.000998529900, 0.002922998700, -0.002382694100, -0.001119665800}, + { -0.008700892600, 0.009490588100, -0.010079579700, 0.012413140100, + -0.017095016200, 0.026165585800, -0.048812541600, 0.298055673800, + 0.058732787100, -0.020602200500, 0.008620998100, -0.002542420300, + -0.000993979200, 0.002919927800, -0.002379174400, -0.001114931100}, + { -0.008636872000, 0.009421764400, -0.009992841500, 0.012312002500, + -0.016954362300, 0.025981636800, -0.048553232900, 0.298128451900, + 0.058438067100, -0.020540206700, 0.008612798400, -0.002542224400, + -0.000983932400, 0.002923485400, -0.002384780000, -0.001113605200}, + { -0.008584316600, 0.009358594600, -0.009907251900, 0.012198085900, + -0.016816027500, 0.025797833300, -0.048288066300, 0.298212478700, + 0.058145551600, -0.020476810000, 0.008597199400, -0.002545865100, + -0.000977036900, 0.002912420300, -0.002385153400, -0.001107825500}, + { -0.008530542400, 0.009288917500, -0.009827928200, 0.012092889000, + -0.016673561700, 0.025612282700, -0.048021033600, 0.298294423300, + 0.057854601300, -0.020415253900, 0.008590004000, -0.002551352000, + -0.000978876700, 0.002913287000, -0.002390253900, -0.001106899300}, + { -0.008478483800, 0.009226377800, -0.009743311000, 0.011980096900, + -0.016536709400, 0.025434302500, -0.047747110700, 0.298365825000, + 0.057560244000, -0.020351243300, 0.008574970900, -0.002557026600, + -0.000963826400, 0.002905602300, -0.002391414300, -0.001100755900}, + { -0.008424799700, 0.009165535000, -0.009662092200, 0.011877441200, + -0.016403414400, 0.025245702300, -0.047478375000, 0.298446320200, + 0.057267599400, -0.020285138500, 0.008556551400, -0.002557753000, + -0.000960917900, 0.002905172800, -0.002396213700, -0.001099661700}, + { -0.008372136900, 0.009097353100, -0.009584221800, 0.011773596800, + -0.016261962900, 0.025060099400, -0.047209041900, 0.298525257700, + 0.056970953100, -0.020227896300, 0.008551918400, -0.002566762900, + -0.000953100600, 0.002907384900, -0.002394239600, -0.001094674000}, + { -0.008313510700, 0.009040315200, -0.009502392800, 0.011662794000, + -0.016126306100, 0.024878475500, -0.046947568800, 0.298599300000, + 0.056680564700, -0.020162388700, 0.008534249200, -0.002568479900, + -0.000948207200, 0.002898882600, -0.002402600100, -0.001092599200}, + { -0.008261904500, 0.008972850800, -0.009425383800, 0.011560009200, + -0.015986171300, 0.024693755800, -0.046677919500, 0.298682066000, + 0.056395501800, -0.020100315900, 0.008526039100, -0.002568721700, + -0.000936969600, 0.002900157200, -0.002400214400, -0.001087840400}, + { -0.008211115400, 0.008911303700, -0.009336453400, 0.011454721200, + -0.015854163900, 0.024519846800, -0.046405144200, 0.298759763600, + 0.056094866900, -0.020029308800, 0.008512218400, -0.002571374100, + -0.000931811500, 0.002891576300, -0.002408972300, -0.001085440200}, + { -0.008160197200, 0.008844778800, -0.009260656400, 0.011353693700, + -0.015715846500, 0.024337335900, -0.046141044500, 0.298830136900, + 0.055805182500, -0.019961847400, 0.008492748700, -0.002571357000, + -0.000929480500, 0.002891307200, -0.002413690800, -0.001084311600}, + { -0.008112103700, 0.008787159000, -0.009181661000, 0.011251768000, + -0.015576602500, 0.024153171400, -0.045870162900, 0.298911585300, + 0.055512440700, -0.019901475900, 0.008478936300, -0.002583378800, + -0.000920943200, 0.002893394800, -0.002411773500, -0.001079321400}, + { -0.008048876900, 0.008722251100, -0.009104537200, 0.011142744200, + -0.015441580200, 0.023970548200, -0.045602981800, 0.298977325900, + 0.055225670900, -0.019836708300, 0.008468413700, -0.002580907300, + -0.000917271300, 0.002885109500, -0.002420012800, -0.001077376400}, + { -0.008001667100, 0.008665715200, -0.009027145700, 0.011042483200, + -0.015304227600, 0.023788158500, -0.045332341000, 0.299058734900, + 0.054929607400, -0.019764945900, 0.008453838100, -0.002582942100, + -0.000913301400, 0.002882721100, -0.002416975800, -0.001072495600}, + { -0.007952339000, 0.008600802500, -0.008952392000, 0.010937108800, + -0.015174084700, 0.023616380100, -0.045064497900, 0.299132742800, + 0.054637916200, -0.019701857900, 0.008436057700, -0.002584785900, + -0.000903092500, 0.002886222600, -0.002422944100, -0.001071045600}, + { -0.007904789100, 0.008543139300, -0.008868051200, 0.010841905100, + -0.015039485700, 0.023436300800, -0.044799707800, 0.299207159000, + 0.054344370000, -0.019630506300, 0.008421815400, -0.002587760600, + -0.000897083200, 0.002875630100, -0.002423263600, -0.001065244000}, + { -0.007858185900, 0.008487210700, -0.008790859100, 0.010741407000, + -0.014901532300, 0.023251997700, -0.044524238300, 0.299282071600, + 0.054053355000, -0.019568733800, 0.008411993400, -0.002586331800, + -0.000887586200, 0.002879382200, -0.002429340400, -0.001063908000}, + { -0.007801564500, 0.008427026200, -0.008718424600, 0.010638181300, + -0.014777689900, 0.023068158700, -0.044255915700, 0.299351847700, + 0.053763206500, -0.019505511500, 0.008395133400, -0.002594827000, + -0.000887696700, 0.002878112600, -0.002426308700, -0.001059278600}, + { -0.007752716200, 0.008371567300, -0.008642947600, 0.010540183700, + -0.014642563600, 0.022887494700, -0.043988296500, 0.299422575400, + 0.053470206400, -0.019432499100, 0.008379552700, -0.002596486400, + -0.000883216200, 0.002869839900, -0.002434904100, -0.001056944400}, + { -0.007704528800, 0.008307898100, -0.008569545100, 0.010435528000, + -0.014511589100, 0.022707641400, -0.043720040800, 0.299491463500, + 0.053178429700, -0.019359975400, 0.008364846400, -0.002599735200, + -0.000871418100, 0.002871064200, -0.002432661800, -0.001051839000}, + { -0.007660450500, 0.008255099600, -0.008496004600, 0.010339539800, + -0.014379049200, 0.022534686200, -0.043441724600, 0.299571471100, + 0.052882824900, -0.019298671600, 0.008346423900, -0.002600374000, + -0.000867444000, 0.002862933200, -0.002441463500, -0.001049660900}, + { -0.007603296800, 0.008193382200, -0.008416132400, 0.010239585900, + -0.014249993700, 0.022355720200, -0.043172229900, 0.299637947900, + 0.052591247500, -0.019224621200, 0.008329727400, -0.002600981100, + -0.000864181800, 0.002860802800, -0.002438248600, -0.001044875200}, + { -0.007559512700, 0.008140629300, -0.008342511600, 0.010143265600, + -0.014116237900, 0.022175330800, -0.042902559300, 0.299710502400, + 0.052305586800, -0.019159837900, 0.008310446500, -0.002602041500, + -0.000854285500, 0.002864501800, -0.002444344500, -0.001043455200}, + { -0.007513280300, 0.008079345200, -0.008271562400, 0.010041094600, + -0.013987692600, 0.021997806500, -0.042634176100, 0.299784559000, + 0.052010247800, -0.019090033000, 0.008295552500, -0.002604044700, + -0.000848986600, 0.002854305800, -0.002444743300, -0.001037730800}, + { -0.007466532300, 0.008026496300, -0.008198836000, 0.009945759200, + -0.013854742200, 0.021817938400, -0.042362922600, 0.299849767900, + 0.051710800600, -0.019017700500, 0.008278482500, -0.002603834000, + -0.000847135700, 0.002854590200, -0.002450114700, -0.001036184000}, + { -0.007421596100, 0.007966615900, -0.008130314500, 0.009852191300, + -0.013722881700, 0.021638799100, -0.042091828500, 0.299919862100, + 0.051423553400, -0.018942531200, 0.008260465800, -0.002603155800, + -0.000845516700, 0.002854963900, -0.002455432800, -0.001034829400}, + { -0.007368637400, 0.007915606500, -0.008049069000, 0.009753117800, + -0.013595099000, 0.021460165700, -0.041819414400, 0.299987943200, + 0.051132245200, -0.018881182900, 0.008242689200, -0.002605985800, + -0.000832295700, 0.002848095500, -0.002456559500, -0.001029015400}, + { -0.007324914900, 0.007857386900, -0.007982526900, 0.009661947700, + -0.013466505100, 0.021290236500, -0.041544493000, 0.300056546100, + 0.050837682400, -0.018808660100, 0.008224995500, -0.002605154700, + -0.000830876200, 0.002848515400, -0.002462055200, -0.001027526900}, + { -0.007282763500, 0.007806632000, -0.007910621800, 0.009561255200, + -0.013339966800, 0.021114682600, -0.041280619200, 0.300121126000, + 0.050544717400, -0.018736947600, 0.008207999600, -0.002605038900, + -0.000828329300, 0.002846963800, -0.002458999000, -0.001022805800}, + { -0.007239595400, 0.007748911800, -0.007844275500, 0.009469910200, + -0.013210161600, 0.020936736300, -0.041008289300, 0.300195390400, + 0.050254997100, -0.018666130300, 0.008192356500, -0.002607611600, + -0.000816356100, 0.002842431500, -0.002468794900, -0.001020138600}, + { -0.007189132600, 0.007701593500, -0.007773543100, 0.009369503200, + -0.013083002200, 0.020758957600, -0.040734108300, 0.300260370700, + 0.049961784800, -0.018593391800, 0.008174805900, -0.002607328400, + -0.000813752200, 0.002840812400, -0.002465827800, -0.001015410900}, + { -0.007142203300, 0.007641451600, -0.007699103200, 0.009282076700, + -0.012954925100, 0.020581902400, -0.040460854200, 0.300332528600, + 0.049672014200, -0.018520619000, 0.008156499100, -0.002606187400, + -0.000812553500, 0.002841559200, -0.002471464900, -0.001013982200}, + { -0.007101149600, 0.007591957300, -0.007628580400, 0.009182604800, + -0.012829151000, 0.020406004000, -0.040192877300, 0.300391149900, + 0.049381202700, -0.018448182600, 0.008139326600, -0.002606745200, + -0.000808105100, 0.002831584800, -0.002471553400, -0.001008355200}, + { -0.007059152000, 0.007535511600, -0.007563628700, 0.009092708900, + -0.012700466400, 0.020228151600, -0.039917450300, 0.300460428800, + 0.049092597200, -0.018375427900, 0.008121814200, -0.002607487200, + -0.000798497500, 0.002835512200, -0.002478145100, -0.001006580300}, + { -0.007009147900, 0.007482806700, -0.007499733800, 0.008997547100, + -0.012579180400, 0.020062891600, -0.039647806700, 0.300525821900, + 0.048797495500, -0.018306252700, 0.008105184800, -0.002607855100, + -0.000794439900, 0.002825789700, -0.002478474500, -0.001000945500}, + { -0.006968240600, 0.007433072000, -0.007423567800, 0.008910574100, + -0.012451522700, 0.019885022200, -0.039369675200, 0.300585104500, + 0.048504934800, -0.018229228000, 0.008077029200, -0.002609247300, + -0.000792419400, 0.002826100200, -0.002484009500, -0.000999231800}, + { -0.006926866100, 0.007377316500, -0.007358544200, 0.008814417700, + -0.012328366300, 0.019710850400, -0.039101469100, 0.300648793200, + 0.048210544400, -0.018159587600, 0.008060409500, -0.002610430800, + -0.000781936000, 0.002828146400, -0.002482150700, -0.000994223400}, + { -0.006888043700, 0.007330615400, -0.007291803100, 0.008724921800, + -0.012200346800, 0.019533132300, -0.038822537300, 0.300712721600, + 0.047923413800, -0.018085444700, 0.008041528500, -0.002609670800, + -0.000778976000, 0.002820404900, -0.002490903500, -0.000991790800}, + { -0.006839293900, 0.007279247200, -0.007229475600, 0.008630964100, + -0.012079179200, 0.019361129300, -0.038555831700, 0.300783801700, + 0.047630120500, -0.018006655200, 0.008026572200, -0.002609399200, + -0.000777006200, 0.002819585000, -0.002488366700, -0.000987006200}, + { -0.006798540800, 0.007233149500, -0.007164042200, 0.008543197100, + -0.011953311100, 0.019186232700, -0.038285177900, 0.300844278100, + 0.047335766700, -0.017934700400, 0.008007388400, -0.002607418500, + -0.000776398500, 0.002820679000, -0.002494154700, -0.000985371100}, + { -0.006756891400, 0.007176317300, -0.007091540900, 0.008451307000, + -0.011831617600, 0.019011206000, -0.038005599900, 0.300906624700, + 0.047040567000, -0.017860728900, 0.007980042300, -0.002609428000, + -0.000772729700, 0.002812783500, -0.002503047600, -0.000982640700}, + { -0.006720561000, 0.007132703000, -0.007028371700, 0.008366076000, + -0.011709400800, 0.018846008900, -0.037732212300, 0.300971836600, + 0.046750583700, -0.017789132800, 0.007960205400, -0.002601193300, + -0.000759764200, 0.002814043200, -0.002500954000, -0.000977775800}, + { -0.006672404900, 0.007082025300, -0.006966950500, 0.008272917200, + -0.011588469400, 0.018672889900, -0.037460064500, 0.301029284000, + 0.046455528400, -0.017707594600, 0.007943302900, -0.002599558300, + -0.000759066900, 0.002815167300, -0.002506921900, -0.000975884100}, + { -0.006636191300, 0.007038598500, -0.006903834600, 0.008187619300, + -0.011464768800, 0.018499388000, -0.037187988000, 0.301092874000, + 0.046165570800, -0.017634207100, 0.007916113200, -0.002601855100, + -0.000754783900, 0.002805563000, -0.002507180900, -0.000970271500}, + { -0.006596465200, 0.006983825400, -0.006833460100, 0.008097770300, + -0.011345476800, 0.018327200500, -0.036915500100, 0.301155082000, + 0.045876647100, -0.017561716500, 0.007895994400, -0.002599054600, + -0.000754654000, 0.002806943500, -0.002513137500, -0.000968652300}, + { -0.006560412300, 0.006940658100, -0.006770837100, 0.008012401600, + -0.011221145500, 0.018151488300, -0.036633262700, 0.301219274700, + 0.045585787000, -0.017480905500, 0.007880162200, -0.002600023100, + -0.000744270100, 0.002809097600, -0.002511606900, -0.000963551800}, + { -0.006513718100, 0.006891483500, -0.006710841700, 0.007921089700, + -0.011101909300, 0.017979361500, -0.036359405400, 0.301279455500, + 0.045296336600, -0.017405665400, 0.007850982300, -0.002599865500, + -0.000743330300, 0.002810197300, -0.002517572500, -0.000961768900}, + { -0.006474304300, 0.006840684600, -0.006652924700, 0.007839125400, + -0.010981524900, 0.017809064300, -0.036093943200, 0.301337182700, + 0.044998869000, -0.017326384300, 0.007833242200, -0.002591035300, + -0.000737232300, 0.002800277200, -0.002517691400, -0.000956035500}, + { -0.006438009700, 0.006796203400, -0.006581589800, 0.007750585800, + -0.010864711900, 0.017645387000, -0.035815202400, 0.301393202700, + 0.044711874800, -0.017252787500, 0.007812074200, -0.002587518900, + -0.000737454900, 0.002801635500, -0.002523680400, -0.000954362300}, + { -0.006401639200, 0.006746574400, -0.006524338300, 0.007669196200, + -0.010743983400, 0.017473398600, -0.035540053600, 0.301451854500, + 0.044413545900, -0.017171090300, 0.007786312500, -0.002588925400, + -0.000735266600, 0.002800927700, -0.002521316700, -0.000949359500}, + { -0.006357778700, 0.006707002900, -0.006462535400, 0.007577627400, + -0.010624650100, 0.017300143900, -0.035261474700, 0.301505220000, + 0.044127692000, -0.017097850700, 0.007766585900, -0.002588324800, + -0.000724851500, 0.002796544700, -0.002531073000, -0.000946421000}, + { -0.006322346100, 0.006658344700, -0.006406316800, 0.007497394200, + -0.010505114500, 0.017129209400, -0.034986501900, 0.301569791200, + 0.043833061900, -0.017016393700, 0.007740582000, -0.002589426200, + -0.000722919300, 0.002796070900, -0.002528823500, -0.000941518100}, + { -0.006287412000, 0.006615790600, -0.006337047100, 0.007410503600, + -0.010389329800, 0.016959520500, -0.034711193400, 0.301633352100, + 0.043538995400, -0.016935463200, 0.007720653400, -0.002578087200, + -0.000720236600, 0.002796975700, -0.002534998100, -0.000939577700}, + { -0.006252289300, 0.006567502700, -0.006281032900, 0.007330632700, + -0.010270201400, 0.016788863600, -0.034440183200, 0.301682214200, + 0.043245651300, -0.016861993700, 0.007691723300, -0.002578831000, + -0.000716968300, 0.002787827800, -0.002535183000, -0.000933911800}, + { -0.006205898400, 0.006520927200, -0.006224969100, 0.007243536900, + -0.010155742500, 0.016627163700, -0.034159645200, 0.301741305700, + 0.042953309700, -0.016781259600, 0.007673144100, -0.002575750000, + -0.000717300000, 0.002789556600, -0.002541677100, -0.000931902700}, + { -0.006172693900, 0.006480627500, -0.006158957400, 0.007167086900, + -0.010038799500, 0.016458330300, -0.033890060300, 0.301797671500, + 0.042661393000, -0.016699057600, 0.007646012100, -0.002576064600, + -0.000715860200, 0.002789348200, -0.002539353500, -0.000927029000}, + { -0.006137812800, 0.006432255400, -0.006101996500, 0.007079130900, + -0.009922951400, 0.016287909800, -0.033610088700, 0.301854227800, + 0.042371265900, -0.016625851600, 0.007623115300, -0.002564257200, + -0.000711981100, 0.002781648600, -0.002548160800, -0.000924334400}, + { -0.006096934600, 0.006396695800, -0.006045818500, 0.007000329800, + -0.009804671800, 0.016116562300, -0.033329631800, 0.301910604800, + 0.042079113900, -0.016542771000, 0.007595979900, -0.002565562900, + -0.000702731900, 0.002785897700, -0.002555562000, -0.000921807000}, + { -0.006062933900, 0.006349328800, -0.005990311300, 0.006913942100, + -0.009690267000, 0.015948108300, -0.033057262400, 0.301962237800, + 0.041789551900, -0.016461435000, 0.007576990700, -0.002562617900, + -0.000702405400, 0.002786094000, -0.002553292100, -0.000917048000}, + { -0.006030707600, 0.006310258300, -0.005925655500, 0.006838885700, + -0.009574791300, 0.015779698800, -0.032784352400, 0.302013593200, + 0.041498813500, -0.016377089100, 0.007547123000, -0.002553821800, + -0.000697783700, 0.002778180000, -0.002562081100, -0.000914005100}, + { -0.005997784800, 0.006264360200, -0.005871819500, 0.006754676200, + -0.009463128500, 0.015620155600, -0.032502060800, 0.302075447000, + 0.041202514700, -0.016296539100, 0.007519854200, -0.002553495700, + -0.000696753300, 0.002778219800, -0.002560158500, -0.000909073800}, + { -0.005953802600, 0.006220559900, -0.005819558300, 0.006678209600, + -0.009346907400, 0.015450858700, -0.032226770800, 0.302123085100, + 0.040913759200, -0.016213682700, 0.007499502500, -0.002549585000, + -0.000697575600, 0.002780297700, -0.002566684400, -0.000906938700}, + { -0.005922781400, 0.006183472900, -0.005762778500, 0.006591765300, + -0.009226118300, 0.015287200700, -0.031955488100, 0.302181358200, + 0.040618118300, -0.016132038300, 0.007470292100, -0.002540354300, + -0.000694252800, 0.002779793900, -0.002564399000, -0.000902066000}, + { -0.005890136500, 0.006137124600, -0.005702277700, 0.006519185100, + -0.009112249200, 0.015119563200, -0.031680564700, 0.302235443400, + 0.040332527300, -0.016048205900, 0.007441443200, -0.002539675700, + -0.000692720800, 0.002772999400, -0.002573622600, -0.000899005400}, + { -0.005850311400, 0.006102867800, -0.005646959300, 0.006433987600, + -0.008998513600, 0.014949546000, -0.031394934700, 0.302282936600, + 0.040036207000, -0.015967839100, 0.007422582300, -0.002538011700, + -0.000683794500, 0.002775953500, -0.002572520300, -0.000893764400}, + { -0.005818964100, 0.006058727200, -0.005595185800, 0.006352081900, + -0.008889397700, 0.014792369000, -0.031117719900, 0.302336098600, + 0.039742056100, -0.015884752000, 0.007391840300, -0.002527331400, + -0.000681709200, 0.002777178000, -0.002579042300, -0.000891447000}, + { -0.005787145500, 0.006013604000, -0.005535923100, 0.006280742400, + -0.008776679900, 0.014625338400, -0.030841085500, 0.302387399600, + 0.039449376300, -0.015802899000, 0.007363441100, -0.002527024400, + -0.000679473400, 0.002768808300, -0.002579613300, -0.000885670100}, + { -0.005757587300, 0.005978394700, -0.005481519600, 0.006197183700, + -0.008665183400, 0.014458560600, -0.030564097400, 0.302437909100, + 0.039157112000, -0.015721824200, 0.007342861000, -0.002522537800, + -0.000680774600, 0.002771330800, -0.002586549000, -0.000883371200}, + { -0.005718682200, 0.005938417900, -0.005432964900, 0.006124864000, + -0.008553225900, 0.014292583100, -0.030288050400, 0.302496054000, + 0.038866667200, -0.015638780000, 0.007312074800, -0.002512230800, + -0.000678044400, 0.002771222400, -0.002584426900, -0.000878577700}, + { -0.005689051600, 0.005902462900, -0.005370854200, 0.006045352000, + -0.008444322900, 0.014128120100, -0.030018298000, 0.302541430200, + 0.038575785600, -0.015556232000, 0.007282648600, -0.002510659300, + -0.000677160100, 0.002764759200, -0.002593729700, -0.000875301600}, + { -0.005657037700, 0.005859684300, -0.005321845400, 0.005973581100, + -0.008333790400, 0.013970133000, -0.029736435400, 0.302587106700, + 0.038284268000, -0.015471874400, 0.007250951700, -0.002499759200, + -0.000674653700, 0.002764455200, -0.002591465700, -0.000870433100}, + { -0.005618333200, 0.005819685000, -0.005272906200, 0.005893724000, + -0.008225058800, 0.013805185000, -0.029458517200, 0.302642069200, + 0.037993797900, -0.015379756300, 0.007223193600, -0.002497981300, + -0.000675186800, 0.002766900000, -0.002598670200, -0.000867828800}, + { -0.005590368200, 0.005786648100, -0.005221758100, 0.005820627000, + -0.008112433600, 0.013637747600, -0.029176984600, 0.302685277400, + 0.037704094600, -0.015297293000, 0.007201598200, -0.002493386100, + -0.000675974400, 0.002767923100, -0.002596834100, -0.000863002200}, + { -0.005560237100, 0.005743450200, -0.005163726600, 0.005743489200, + -0.008005370600, 0.013473662100, -0.028897800000, 0.302738295200, + 0.037407381800, -0.015215522800, 0.007170544900, -0.002482718300, + -0.000672918600, 0.002760875300, -0.002606209100, -0.000859635800}, + { -0.005523266400, 0.005705570200, -0.005118090200, 0.005674706600, + -0.007897426600, 0.013318487200, -0.028623546600, 0.302785848400, + 0.037119392900, -0.015122696500, 0.007141770500, -0.002480246400, + -0.000673963700, 0.002763436900, -0.002613369900, -0.000856978400}, + { -0.005495962500, 0.005673345500, -0.005067111000, 0.005594863000, + -0.007789506200, 0.013153651000, -0.028342627700, 0.302836038100, + 0.036823681900, -0.015040742100, 0.007110968900, -0.002470535200, + -0.000662984200, 0.002766079600, -0.002612329100, -0.000851700900}, + { -0.005466715000, 0.005630853800, -0.005009907900, 0.005518700600, + -0.007683264800, 0.012990451100, -0.028068772200, 0.302880652200, + 0.036538564300, -0.014956855300, 0.007079958900, -0.002467783200, + -0.000662808200, 0.002759921300, -0.002621758400, -0.000848374500}, + { -0.005431440200, 0.005602376800, -0.004962376800, 0.005449141300, + -0.007574066800, 0.012825587300, -0.027786579600, 0.302928958400, + 0.036241695300, -0.014864692500, 0.007050007700, -0.002456982100, + -0.000660605900, 0.002760230300, -0.002620023800, -0.000843220100}, + { -0.005403790100, 0.005562631500, -0.004915494200, 0.005372340600, + -0.007469387600, 0.012671612300, -0.027509888900, 0.302972042600, + 0.035948446100, -0.014782113000, 0.007018769500, -0.002453253700, + -0.000662228600, 0.002763058500, -0.002627331400, -0.000840529300}, + { -0.005375829600, 0.005521878400, -0.004861076400, 0.005306264800, + -0.007362011400, 0.012507686500, -0.027226932800, 0.303024793500, + 0.035664170800, -0.014687960800, 0.006987700700, -0.002441798600, + -0.000660398000, 0.002763550700, -0.002625576200, -0.000835595400}, + { -0.005338761500, 0.005492759000, -0.004812448700, 0.005228310500, + -0.007255591100, 0.012344296500, -0.026950110700, 0.303065098100, + 0.035372382500, -0.014606652200, 0.006965471700, -0.002436456100, + -0.000661304500, 0.002757890500, -0.002635253900, -0.000832149000}, + { -0.005312372600, 0.005454664900, -0.004768172600, 0.005161314600, + -0.007148973700, 0.012182095400, -0.026675115000, 0.303114481500, + 0.035080728500, -0.014514347800, 0.006934810400, -0.002424890300, + -0.000659803100, 0.002758707300, -0.002633626200, -0.000827077700}, + { -0.005284879700, 0.005414457900, -0.004713545300, 0.005088017800, + -0.007046166400, 0.012028731400, -0.026395378600, 0.303152032300, + 0.034789552300, -0.014430720500, 0.006902452300, -0.002420492700, + -0.000661736600, 0.002761659300, -0.002641015800, -0.000824295400}, + { -0.005260165300, 0.005385875600, -0.004667616900, 0.005020650000, + -0.006939263300, 0.011865377600, -0.026110236100, 0.303202188800, + 0.034489243800, -0.014340074900, 0.006872439200, -0.002409717000, + -0.000658712000, 0.002753575300, -0.002641882000, -0.000818255300}, + { -0.005224926800, 0.005349893100, -0.004623184600, 0.004945699400, + -0.006835218200, 0.011703557800, -0.025832407000, 0.303246711500, + 0.034200612800, -0.014256420100, 0.006840045300, -0.002405245800, + -0.000660759200, 0.002756849600, -0.002649463500, -0.000815493900}, + { -0.005200360700, 0.005320551900, -0.004569322900, 0.004882070200, + -0.006731633300, 0.011550770700, -0.025552150000, 0.303289891500, + 0.033911264900, -0.014163218900, 0.006808149800, -0.002392746100, + -0.000659822800, 0.002757788100, -0.002647879500, -0.000810410400}, + { -0.005174599300, 0.005283024100, -0.004525112200, 0.004808058900, + -0.006628580100, 0.011389660100, -0.025273538400, 0.303332931800, + 0.033621534300, -0.014068603800, 0.006768397200, -0.002390857100, + -0.000660917000, 0.002760752200, -0.002655561700, -0.000807307300}, + { -0.005138134800, 0.005247478500, -0.004481928500, 0.004734510300, + -0.006526058700, 0.011228843600, -0.024994140700, 0.303374972100, + 0.033324125700, -0.013977140000, 0.006737428400, -0.002379104500, + -0.000658573800, 0.002752971400, -0.002656330700, -0.000801279400}, + { -0.005114783000, 0.005220209500, -0.004437547500, 0.004669293700, + -0.006421881700, 0.011075401800, -0.024710885300, 0.303413510200, + 0.033037596300, -0.013892349300, 0.006703786000, -0.002373851700, + -0.000661054500, 0.002756297000, -0.002663989900, -0.000798421400}, + { -0.005089678500, 0.005182927100, -0.004386048500, 0.004598983100, + -0.006321772900, 0.010917290800, -0.024440923300, 0.303460194300, + 0.032743987500, -0.013801224500, 0.006672169900, -0.002360988600, + -0.000660639200, 0.002757935000, -0.002662733600, -0.000793256200}, + { -0.005056179700, 0.005149409700, -0.004345436200, 0.004535552600, + -0.006218141400, 0.010756094800, -0.024159306200, 0.303498774900, + 0.032447376800, -0.013708092900, 0.006639524200, -0.002347647400, + -0.000660699900, 0.002760669100, -0.002670435300, -0.000790060900}, + { -0.005033354700, 0.005122582000, -0.004301162100, 0.004462875900, + -0.006117430000, 0.010604498100, -0.023875417200, 0.303542329100, + 0.032163622800, -0.013614934700, 0.006608303400, -0.002343698500, + -0.000661193300, 0.002753849700, -0.002671460600, -0.000784186200}, + { -0.005008967600, 0.005086442900, -0.004251580600, 0.004402065400, + -0.006015436800, 0.010444472000, -0.023593760200, 0.303587200300, + 0.031870307600, -0.013522135600, 0.006575368200, -0.002330048900, + -0.000661553700, 0.002756838700, -0.002679339200, -0.000781019600}, + { -0.004976115300, 0.005053314100, -0.004210712400, 0.004331218000, + -0.005916048600, 0.010294044300, -0.023316437000, 0.303617696900, + 0.031578389200, -0.013438640200, 0.006541662700, -0.002324484400, + -0.000663934100, 0.002759243400, -0.002678215700, -0.000775874300}, + { -0.004954054600, 0.005027973800, -0.004168660700, 0.004268203300, + -0.005813434400, 0.010133649800, -0.023032853100, 0.303659766600, + 0.031285977700, -0.013345456800, 0.006508681700, -0.002311137900, + -0.000663161200, 0.002753279200, -0.002688143900, -0.000771897700}, + { -0.004928421800, 0.004991603200, -0.004118619000, 0.004199827500, + -0.005715080300, 0.009976047300, -0.022758266200, 0.303698508100, + 0.030994692400, -0.013251271900, 0.006466426100, -0.002299129500, + -0.000662665200, 0.002754921900, -0.002686928900, -0.000766591100}, + { -0.004896598000, 0.004959857600, -0.004080198200, 0.004139136400, + -0.005614528900, 0.009824735600, -0.022471040100, 0.303736794400, + 0.030703785700, -0.013157852400, 0.006433708500, -0.002293624700, + -0.000665508700, 0.002758724400, -0.002695207900, -0.000763249100}, + { -0.004875341600, 0.004935181400, -0.004038259800, 0.004068687500, + -0.005515687900, 0.009667243000, -0.022195988700, 0.303781885400, + 0.030415689700, -0.013065059600, 0.006400355600, -0.002279366500, + -0.000666054000, 0.002760975500, -0.002694258700, -0.000758084800}, + { -0.004852436300, 0.004901022400, -0.003990905800, 0.004010497500, + -0.005416881700, 0.009517056100, -0.021907914800, 0.303818672700, + 0.030116858700, -0.012972784600, 0.006366843000, -0.002265346200, + -0.000665839200, 0.002755157700, -0.002704376900, -0.000753869400}, + { -0.004820883700, 0.004869373400, -0.003952016200, 0.003941798300, + -0.005318693300, 0.009359170900, -0.021629843300, 0.303851724000, + 0.029827673100, -0.012877926700, 0.006324791000, -0.002261575500, + -0.000667691900, 0.002757747500, -0.002703558700, -0.000748453500}, + { -0.004800157600, 0.004845423900, -0.003911010800, 0.003872318000, + -0.005220680700, 0.009201932100, -0.021352478200, 0.303893044500, + 0.029540755700, -0.012784037700, 0.006290380500, -0.002246404800, + -0.000669147800, 0.002761387900, -0.002711729700, -0.000745150400}, + { -0.004769350200, 0.004814396600, -0.003865951000, 0.003816203000, + -0.005123937600, 0.009054063500, -0.021072898700, 0.303933574300, + 0.029244511900, -0.012682582100, 0.006257946300, -0.002231939800, + -0.000670457700, 0.002764997300, -0.002720193100, -0.000741467200}, + { -0.004747416400, 0.004781502000, -0.003827110500, 0.003747768000, + -0.005026132300, 0.008896179300, -0.020792099600, 0.303962072700, + 0.028957720600, -0.012588365800, 0.006224450200, -0.002226292500, + -0.000672109200, 0.002758959300, -0.002721352900, -0.000735386100}, + { -0.004727789800, 0.004759315900, -0.003788813700, 0.003689348800, + -0.004928785800, 0.008748066500, -0.020511012600, 0.304000013700, + 0.028662906200, -0.012494169800, 0.006180633900, -0.002212588700, + -0.000673225600, 0.002762428000, -0.002729709100, -0.000731741100}, + { -0.004696972400, 0.004728140600, -0.003742729000, 0.003624215600, + -0.004833255700, 0.008591834800, -0.020230886000, 0.304036328200, + 0.028369440400, -0.012401276400, 0.006146021300, -0.002197280000, + -0.000674502100, 0.002765145400, -0.002729093500, -0.000726338700}, + { -0.004676431900, 0.004696952300, -0.003706438500, 0.003566616300, + -0.004736314300, 0.008443532900, -0.019947475400, 0.304069875300, + 0.028085420100, -0.012305860200, 0.006110074200, -0.002181080700, + -0.000676615300, 0.002768917300, -0.002737400900, -0.000722876700}, + { -0.004656297500, 0.004673640000, -0.003658751600, 0.003501561200, + -0.004641076300, 0.008287779500, -0.019666250900, 0.304104283300, + 0.027791528800, -0.012202678300, 0.006068547500, -0.002176335900, + -0.000686846600, 0.002761221600, -0.002738242600, -0.000716707400}, + { -0.004627325000, 0.004645205500, -0.003624168800, 0.003445318600, + -0.004545293600, 0.008140437800, -0.019382840700, 0.304145203000, + 0.027501099300, -0.012109150300, 0.006033055000, -0.002160148400, + -0.000689188800, 0.002765358600, -0.002746934500, -0.000713093800}, + { -0.004606892100, 0.004614320900, -0.003587618200, 0.003379587100, + -0.004450296100, 0.007984496700, -0.019099860600, 0.304176499000, + 0.027209072700, -0.012013907400, 0.005988351400, -0.002145830400, + -0.000690246800, 0.002768039100, -0.002746360600, -0.000707548200}, + { -0.004586402700, 0.004592039500, -0.003542025400, 0.003325426100, + -0.004364128800, 0.007835360000, -0.018813366000, 0.304205403100, + 0.026918136600, -0.011919323000, 0.005951839900, -0.002129046300, + -0.000692658900, 0.002772071700, -0.002754968200, -0.000703804500}, + { -0.004557731900, 0.004563753700, -0.003507314200, 0.003260984500, + -0.004270140400, 0.007680828300, -0.018538518600, 0.304240989200, + 0.026629304300, -0.011817129000, 0.005919133800, -0.002122896500, + -0.000695192900, 0.002766964300, -0.002756676800, -0.000697463600}, + { -0.004538643100, 0.004534730300, -0.003473370900, 0.003206136100, + -0.004175833600, 0.007534408900, -0.018252857000, 0.304277893000, + 0.026331338900, -0.011723107400, 0.005874142700, -0.002107933500, + -0.000697223600, 0.002771073600, -0.002765610600, -0.000693472400}, + { -0.004519537900, 0.004512855100, -0.003427404600, 0.003142614000, + -0.004082281200, 0.007379100700, -0.017967050900, 0.304303942000, + 0.026040879100, -0.011618952200, 0.005839096700, -0.002091778900, + -0.000699143000, 0.002774214900, -0.002765299300, -0.000687773900}, + { -0.004491807700, 0.004485759300, -0.003394540900, 0.003088733700, + -0.003988893700, 0.007233613500, -0.017688142300, 0.304334966200, + 0.025753901000, -0.011523237400, 0.005793241800, -0.002076469800, + -0.000700635700, 0.002769352400, -0.002775689300, -0.000683225000}, + { -0.004473209500, 0.004457105300, -0.003360743400, 0.003025836900, + -0.003897094200, 0.007088896400, -0.017409498600, 0.304365892000, + 0.025458149700, -0.011421394400, 0.005759636500, -0.002069178100, + -0.000704737500, 0.002773610300, -0.002775967700, -0.000677353300}, + { -0.004444717000, 0.004428620600, -0.003317850200, 0.002964408100, + -0.003805137400, 0.006934475400, -0.017122233300, 0.304396703900, + 0.025171340700, -0.011324954000, 0.005713078000, -0.002053187900, + -0.000707171500, 0.002777897100, -0.002784864800, -0.000673385900}, + { -0.004427971200, 0.004410227500, -0.003284178700, 0.002910777700, + -0.003712463600, 0.006789868500, -0.016843034600, 0.304434316000, + 0.024877740200, -0.011222628000, 0.005678048700, -0.002036501400, + -0.000709968500, 0.002781813700, -0.002785075900, -0.000667608000}, + { -0.004408351500, 0.004381556100, -0.003250508600, 0.002848323700, + -0.003620768300, 0.006636493100, -0.016563520800, 0.304461685600, + 0.024583535100, -0.011127576100, 0.005631997300, -0.002020750200, + -0.000711766400, 0.002777270600, -0.002795722300, -0.000662778500}, + { -0.004380801100, 0.004354269100, -0.003209814400, 0.002797300300, + -0.003529018000, 0.006491186300, -0.016272419800, 0.304487711300, + 0.024298095100, -0.011022085400, 0.005594924600, -0.002002390300, + -0.000723818600, 0.002779372400, -0.002795073400, -0.000657344200}, + { -0.004364270700, 0.004336078200, -0.003175763900, 0.002735447600, + -0.003438518800, 0.006347319100, -0.015991075000, 0.304521224900, + 0.024006627800, -0.010927513200, 0.005549280000, -0.001994892300, + -0.000728568200, 0.002784856700, -0.002804663400, -0.000653083600}, + { -0.004346993300, 0.004309182300, -0.003144467600, 0.002683321200, + -0.003346361400, 0.006194068900, -0.015710304400, 0.304553789700, + 0.023714956500, -0.010823907600, 0.005513183700, -0.001977598200, + -0.000731700400, 0.002789105500, -0.002805014900, -0.000647228100}, + { -0.004320158000, 0.004282770700, -0.003104379700, 0.002625535200, + -0.003266869300, 0.006049293000, -0.015425894300, 0.304574941900, + 0.023422456600, -0.010718772800, 0.005467201200, -0.001961254600, + -0.000733852000, 0.002784613800, -0.002815797600, -0.000642092900}, + { -0.004304236600, 0.004265540900, -0.003072014700, 0.002573489500, + -0.003175720900, 0.005905217400, -0.015142040300, 0.304603952900, + 0.023132882500, -0.010623938900, 0.005429023200, -0.001942767200, + -0.000737686400, 0.002789011800, -0.002816103900, -0.000636395600}, + { -0.004276818000, 0.004239878600, -0.003041041300, 0.002513598800, + -0.003086221600, 0.005753037200, -0.014859275700, 0.304632697600, + 0.022842239000, -0.010518612100, 0.005382887800, -0.001926054500, + -0.000740960500, 0.002794055000, -0.002825655200, -0.000631862500}, + { -0.004260008200, 0.004213047800, -0.003001761400, 0.002464760300, + -0.002997005100, 0.005609909000, -0.014574287100, 0.304659243200, + 0.022552838800, -0.010413292700, 0.005336339900, -0.001909172200, + -0.000744164300, 0.002798242800, -0.002826051900, -0.000625856800}, + { -0.004243542100, 0.004187192800, -0.002971287100, 0.002405494900, + -0.002908733500, 0.005467214000, -0.014288738100, 0.304685251500, + 0.022254934400, -0.010309931700, 0.005299546400, -0.001891056000, + -0.000747532900, 0.002794490100, -0.002837320300, -0.000620567900}, + { -0.004219310800, 0.004172433300, -0.002940735300, 0.002355245100, + -0.002818772700, 0.005315723700, -0.014013384600, 0.304717226400, + 0.021968758500, -0.010213985000, 0.005251545400, -0.001873340800, + -0.000751378700, 0.002799287100, -0.002837812200, -0.000614775300}, + { -0.004202808100, 0.004145912600, -0.002901375100, 0.002298070200, + -0.002731549500, 0.005173599700, -0.013726605300, 0.304740017400, + 0.021680873600, -0.010108070800, 0.005204656900, -0.001863954900, + -0.000765887200, 0.002803718400, -0.002847187000, -0.000610304100}, + { -0.004186959300, 0.004120956700, -0.002872462400, 0.002248900100, + -0.002642744000, 0.005030699900, -0.013438827400, 0.304761759800, + 0.021384603000, -0.010003738700, 0.005166831300, -0.001845151700, + -0.000770115000, 0.002808540700, -0.002847973400, -0.000604129100}, + { -0.004163246800, 0.004106712900, -0.002842615100, 0.002191690700, + -0.002565148700, 0.004888119300, -0.013159535100, 0.304789228200, + 0.021099553600, -0.009897966800, 0.005119292900, -0.001827130700, + -0.000773711900, 0.002804989100, -0.002859183800, -0.000598855700}, + { -0.004147500100, 0.004081504200, -0.002805006000, 0.002144535400, + -0.002477149500, 0.004737013500, -0.012872591200, 0.304818061900, + 0.020805308800, -0.009792977800, 0.005071968700, -0.001809457700, + -0.000777559700, 0.002809924700, -0.002860138800, -0.000592610900}, + { -0.004123192100, 0.004058501200, -0.002776914700, 0.002087605700, + -0.002391051400, 0.004596104100, -0.012592772900, 0.304843748500, + 0.020512328500, -0.009688643500, 0.005033655500, -0.001789823800, + -0.000782738000, 0.002816043200, -0.002870343700, -0.000587752000}, + { -0.004107894100, 0.004034016500, -0.002748740500, 0.002039760600, + -0.002303557800, 0.004453794800, -0.012301966200, 0.304859324200, + 0.020227247700, -0.009581223300, 0.004984922000, -0.001771208200, + -0.000787003500, 0.002820817600, -0.002871029700, -0.000581575700}, + { -0.004093587400, 0.004018650800, -0.002710135300, 0.001984116400, + -0.002218448800, 0.004313763100, -0.012021363400, 0.304882818800, + 0.019935070200, -0.009475750900, 0.004936537800, -0.001751710900, + -0.000801110200, 0.002825556300, -0.002880712200, -0.000576816700}, + { -0.004070086100, 0.003996648800, -0.002683625100, 0.001937624600, + -0.002132234200, 0.004173020900, -0.011739650400, 0.304913195700, + 0.019645015200, -0.009370571700, 0.004888539700, -0.001733255400, + -0.000805210900, 0.002821862700, -0.002883172300, -0.000569990700}, + { -0.004055549600, 0.003973130000, -0.002656075600, 0.001882018300, + -0.002055659400, 0.004022181900, -0.011457786400, 0.304933847400, + 0.019354083600, -0.009265048200, 0.004849033400, -0.001712856100, + -0.000810671500, 0.002828266400, -0.002893561300, -0.000564955500}, + { -0.004032488300, 0.003959785900, -0.002619434800, 0.001836677600, + -0.001970197900, 0.003881652600, -0.011174339100, 0.304952581400, + 0.019063780300, -0.009158923400, 0.004800033700, -0.001693616200, + -0.000815677100, 0.002833775600, -0.002894695900, -0.000558583500}, + { -0.004018305800, 0.003936717300, -0.002592340800, 0.001781462400, + -0.001885730400, 0.003741264500, -0.010881885600, 0.304980747200, + 0.018774565100, -0.009052649800, 0.004751033800, -0.001674129700, + -0.000820954500, 0.002840026900, -0.002905324000, -0.000553421000}, + { -0.004004119100, 0.003913332400, -0.002556816600, 0.001736945200, + -0.001801009400, 0.003601134500, -0.010596860500, 0.304996474100, + 0.018485752400, -0.008946113900, 0.004701589700, -0.001654657700, + -0.000825608000, 0.002836444100, -0.002907704700, -0.000546470300}, + { -0.003981307700, 0.003892381100, -0.002531656000, 0.001692059700, + -0.001716342200, 0.003461182700, -0.010312015500, 0.305021093200, + 0.018188790800, -0.008840558000, 0.004652450700, -0.001634950600, + -0.000831250300, 0.002843043800, -0.002918541200, -0.000541059600}, + { -0.003968435100, 0.003879253900, -0.002504378200, 0.001637657900, + -0.001641796500, 0.003321126400, -0.010034991200, 0.305033298300, + 0.017901410200, -0.008734196000, 0.004611510300, -0.001612810100, + -0.000846731600, 0.002848150100, -0.002919339300, -0.000534931600}, + { -0.003945651400, 0.003857769000, -0.002470181000, 0.001594324200, + -0.001558137300, 0.003181615900, -0.009748599800, 0.305054496900, + 0.017615006900, -0.008627194900, 0.004561274000, -0.001592429500, + -0.000852704600, 0.002854820400, -0.002930099600, -0.000529601300}, + { -0.003932401200, 0.003836006300, -0.002444849800, 0.001541005000, + -0.001475450400, 0.003042687600, -0.009461842200, 0.305074913300, + 0.017319625600, -0.008512038500, 0.004512760300, -0.001572829300, + -0.000857749800, 0.002851744600, -0.002933011700, -0.000522254800}, + { -0.003919897700, 0.003823612700, -0.002418783000, 0.001496396500, + -0.001391517800, 0.002903186600, -0.009174192300, 0.305093771700, + 0.017034066400, -0.008404468200, 0.004462029400, -0.001552015500, + -0.000863995200, 0.002858545300, -0.002943863500, -0.000516831700}, + { -0.003898001300, 0.003803145000, -0.002385302500, 0.001445432500, + -0.001319345500, 0.002764325800, -0.008895401800, 0.305119193600, + 0.016741989300, -0.008298811000, 0.004411842900, -0.001531072000, + -0.000870352000, 0.002865288500, -0.002945799600, -0.000510187200}, + { -0.003884434400, 0.003781771800, -0.002360832800, 0.001401956000, + -0.001236297400, 0.002625496200, -0.008606633700, 0.305135746300, + 0.016448378400, -0.008191697200, 0.004360886000, -0.001509538700, + -0.000886061100, 0.002871262500, -0.002956450800, -0.000504731900}, + { -0.003862439200, 0.003761723500, -0.002336945300, 0.001350046400, + -0.001154788900, 0.002487086200, -0.008316860100, 0.305150388000, + 0.016155288200, -0.008075516000, 0.004311149200, -0.001488975800, + -0.000891753800, 0.002868512700, -0.002959470700, -0.000497291200}, + { -0.003850712800, 0.003749888800, -0.002302777900, 0.001307766700, + -0.001072739500, 0.002349092100, -0.008036267100, 0.305171969300, + 0.015873669900, -0.007967928800, 0.004259593600, -0.001467267200, + -0.000898797900, 0.002875940200, -0.002970642300, -0.000491739700}, + { -0.003838542900, 0.003729366300, -0.002278849600, 0.001256255000, + -0.001000782800, 0.002210354400, -0.007754203600, 0.305182829200, + 0.015582827100, -0.007861128300, 0.004217388300, -0.001444400500, + -0.000906121000, 0.002883075100, -0.002972661400, -0.000485004200}, + { -0.003816509300, 0.003709864100, -0.002256106400, 0.001214479000, + -0.000919168800, 0.002072061100, -0.007462776200, 0.305202709200, + 0.015292286200, -0.007744430700, 0.004166752200, -0.001423103300, + -0.000912804200, 0.002890024200, -0.002974973900, -0.000478005400}, + { -0.003804721500, 0.003689540000, -0.002223798900, 0.001173609200, + -0.000838292300, 0.001934852000, -0.007180134300, 0.305220409100, + 0.015003290600, -0.007637074700, 0.004114765900, -0.001400660100, + -0.000929037200, 0.002887514500, -0.002986971300, -0.000471906600}, + { -0.003784248300, 0.003679910900, -0.002200335400, 0.001122795500, + -0.000758115600, 0.001797689700, -0.006896906600, 0.305236860000, + 0.014714266400, -0.007520350800, 0.004063782200, -0.001378788100, + -0.000936224300, 0.002894831400, -0.002989337500, -0.000464860200}, + { -0.003772222700, 0.003660354300, -0.002177942800, 0.001082013300, + -0.000686657100, 0.001659411900, -0.006612456400, 0.305252141100, + 0.014416934300, -0.007413160700, 0.004011606300, -0.001356286200, + -0.000943766600, 0.002902744400, -0.003001127800, -0.000458704700}, + { -0.003751272400, 0.003641224700, -0.002146282400, 0.001032856600, + -0.000607494800, 0.001522406400, -0.006318495400, 0.305266353700, + 0.014129397200, -0.007305163600, 0.003959036100, -0.001333568200, + -0.000951286800, 0.002910117000, -0.003003554800, -0.000451769900}, + { -0.003740453600, 0.003631277700, -0.002123501700, 0.000991980200, + -0.000527217900, 0.001385321000, -0.006032900700, 0.305278786000, + 0.013842019100, -0.007187486600, 0.003906976500, -0.001311089600, + -0.000958623000, 0.002908760100, -0.003016231600, -0.000445068000}, + { -0.003729050600, 0.003612350900, -0.002101520100, 0.000942775500, + -0.000457538900, 0.001248307000, -0.005747166800, 0.305299153300, + 0.013547492500, -0.007080269600, 0.003854346300, -0.001287607700, + -0.000975892700, 0.002915881100, -0.003018644300, -0.000438154800}, + { -0.003708956400, 0.003594054000, -0.002071214100, 0.000903979800, + -0.000378641900, 0.001112105700, -0.005469551400, 0.305307792800, + 0.013261923500, -0.006962423800, 0.003801673700, -0.001264499500, + -0.000984066100, 0.002924143600, -0.003030515100, -0.000431795400}, + { -0.003698495600, 0.003575779100, -0.002050276300, 0.000864607400, + -0.000299850300, 0.000984943000, -0.005181636100, 0.305325129100, + 0.012968326800, -0.006845375400, 0.003749292200, -0.001241358200, + -0.000992319500, 0.002932209100, -0.003033543100, -0.000424402600}, + { -0.003678810800, 0.003567216600, -0.002028355600, 0.000815922500, + -0.000231056700, 0.000848238100, -0.004893293600, 0.305331925800, + 0.012683638900, -0.006736106600, 0.003694939000, -0.001207958100, + -0.000999989700, 0.002940099300, -0.003045285600, -0.000418160500}, + { -0.003668399300, 0.003548917800, -0.001998535200, 0.000778065700, + -0.000153154100, 0.000712646300, -0.004604878800, 0.305346710600, + 0.012391231100, -0.006618462800, 0.003632680800, -0.001185043200, + -0.001017024400, 0.002938122000, -0.003048768800, -0.000410416700}, + { -0.003658356000, 0.003531138400, -0.001977775600, 0.000729983100, + -0.000075620500, 0.000577290600, -0.004325205000, 0.305359759100, + 0.012099735200, -0.006510233800, 0.003578931400, -0.001160855500, + -0.001026023300, 0.002947019900, -0.003061151700, -0.000403882200}, + { -0.003638938800, 0.003514164000, -0.001958085100, 0.000692151600, + -0.000007068200, 0.000440667500, -0.004034754500, 0.305371538600, + 0.011808326800, -0.006392102100, 0.003525266500, -0.001136761300, + -0.001034693000, 0.002955492900, -0.003064410600, -0.000396332400}, + { -0.003629223000, 0.003505495000, -0.001927947000, 0.000654411200, + 0.000070163400, 0.000314498600, -0.003743576500, 0.305382025700, + 0.011526826700, -0.006273264800, 0.003470974700, -0.001112218000, + -0.001043813600, 0.002964194700, -0.003076842200, -0.000389669200}, + { -0.003610639600, 0.003489057800, -0.001908316200, 0.000607413700, + 0.000146785300, 0.000179544400, -0.003461481100, 0.305390383500, + 0.011237344100, -0.006164483000, 0.003416433800, -0.001087365200, + -0.001053001800, 0.002963790800, -0.003080805300, -0.000381874300}, + { -0.003601269100, 0.003472094500, -0.001888981300, 0.000570197700, + 0.000214496100, 0.000043993500, -0.003178921900, 0.305407159000, + 0.010948594500, -0.006046137200, 0.003362096800, -0.001062345400, + -0.001071720100, 0.002972423700, -0.003093318200, -0.000375146000}, + { -0.003582656500, 0.003455849400, -0.001860858600, 0.000524745900, + 0.000290078700, -0.000090648300, -0.002886387300, 0.305413750100, + 0.010650492300, -0.005928189500, 0.003307938500, -0.001037636700, + -0.001081004600, 0.002981370000, -0.003096989400, -0.000367260100}, + { -0.003573744000, 0.003448505600, -0.001841238600, 0.000487681400, + 0.000357013800, -0.000216601600, -0.002601772700, 0.305427509700, + 0.010362986600, -0.005809448800, 0.003252789700, -0.001002860000, + -0.001090243800, 0.002990478200, -0.003109808600, -0.000360263700}, + { -0.003564774200, 0.003431922800, -0.001822254600, 0.000451135800, + 0.000433003700, -0.000351097200, -0.002316996000, 0.305430485400, + 0.010075840400, -0.005699440100, 0.003187647900, -0.000977716300, + -0.001099777400, 0.002990283900, -0.003113898000, -0.000352248700}, + { -0.003546870600, 0.003416172000, -0.001794658100, 0.000406472900, + 0.000498512700, -0.000485477300, -0.002031776000, 0.305441562500, + 0.009789706100, -0.005580569300, 0.003132334500, -0.000951776200, + -0.001119130300, 0.002999325200, -0.003126683700, -0.000345274700}, + { -0.003538373300, 0.003400302700, -0.001776549100, 0.000370535700, + 0.000573925900, -0.000610465300, -0.001745336100, 0.305451127000, + 0.009494961400, -0.005461903400, 0.003077019500, -0.000925937200, + -0.001129494000, 0.003009011500, -0.003130746200, -0.000337260000}, + { -0.003520921500, 0.003394346200, -0.001758288700, 0.000334721100, + 0.000649268300, -0.000744677800, -0.001458744900, 0.305459744400, + 0.009209557400, -0.005342285700, 0.003021134300, -0.000899901500, + -0.001139831400, 0.003018842400, -0.003144017200, -0.000329970400}, + { -0.003512573100, 0.003378711000, -0.001740435500, 0.000290089600, + 0.000714234800, -0.000869571200, -0.001170623500, 0.305466205000, + 0.008916237200, -0.005223260400, 0.002965177700, -0.000873315500, + -0.001150412500, 0.003019394800, -0.003148759600, -0.000321551300}, + { -0.003495167000, 0.003363569600, -0.001713789300, 0.000255669300, + 0.000788301700, -0.001002806300, -0.000882964600, 0.305471690900, + 0.008632676200, -0.005103588900, 0.002908698000, -0.000837361100, + -0.001169925400, 0.003028499400, -0.003152349500, -0.000313813200}, + { -0.003486840300, 0.003348178400, -0.001696380900, 0.000211810600, + 0.000852586500, -0.001136564500, -0.000594272800, 0.305476029500, + 0.008340196300, -0.004983911700, 0.002842935600, -0.000811096700, + -0.001180375200, 0.003038511100, -0.003165955700, -0.000306075200}, + { -0.003469765900, 0.003343036000, -0.001679160000, 0.000177135000, + 0.000926754800, -0.001260861000, -0.000304469600, 0.305487970600, + 0.008048797600, -0.004864404400, 0.002786310700, -0.000784087900, + -0.001191632000, 0.003048908400, -0.003170531700, -0.000297868500}, + { -0.003461774600, 0.003327682700, -0.001652648300, 0.000143244500, + 0.000990953800, -0.001394377600, -0.000023525100, 0.305488765700, + 0.007767318900, -0.004744017700, 0.002728774500, -0.000756520300, + -0.001202902100, 0.003050026400, -0.003184579100, -0.000289997500}, + { -0.003454137100, 0.003313035200, -0.001636003800, 0.000099881700, + 0.001063982400, -0.001517720800, 0.000267125500, 0.305498083700, + 0.007477414500, -0.004624290900, 0.002671831100, -0.000729004800, + -0.001223831900, 0.003060168000, -0.003188993500, -0.000281862900}, + { -0.003437037600, 0.003298773100, -0.001619778200, 0.000066556300, + 0.001127390700, -0.001650952000, 0.000558741700, 0.305505931400, + 0.007187871700, -0.004503987800, 0.002604895500, -0.000692451500, + -0.001234554400, 0.003070451600, -0.003202910800, -0.000273917600}, + { -0.003429250800, 0.003293434500, -0.001593584100, 0.000033135500, + 0.001200242700, -0.001774239400, 0.000841583200, 0.305502674600, + 0.006898956800, -0.004383683600, 0.002546976200, -0.000664350300, + -0.001246591800, 0.003081283900, -0.003207541700, -0.000265507200}, + { -0.003412490900, 0.003279564200, -0.001577770500, -0.000009313800, + 0.001263167800, -0.001906951100, 0.001124928200, 0.305507531700, + 0.006610834100, -0.004263249000, 0.002489119800, -0.000636159800, + -0.001267872500, 0.003091948000, -0.003221504300, -0.000257636700}, + { -0.003404731400, 0.003265175300, -0.001561789400, -0.000042083300, + 0.001335330400, -0.002029953100, 0.001418764600, 0.305510887600, + 0.006323528600, -0.004142487600, 0.002421344900, -0.000607985400, + -0.001279946600, 0.003093528500, -0.003226864100, -0.000248796500}, + { -0.003388377600, 0.003251864100, -0.001537100900, -0.000074381800, + 0.001397822300, -0.002153295800, 0.001704243000, 0.305521780500, + 0.006027997600, -0.004022334800, 0.002362991000, -0.000569919800, + -0.001292085400, 0.003104789400, -0.003241453500, -0.000240501100}, + { -0.003380795000, 0.003247258700, -0.001521319900, -0.000116373400, + 0.001469413800, -0.002285243100, 0.001989240500, 0.305522735200, + 0.005741703600, -0.003901387700, 0.002304368000, -0.000541061900, + -0.001304684500, 0.003116275100, -0.003246569900, -0.000231910300}, + { -0.003373394500, 0.003233233200, -0.001505753800, -0.000148571000, + 0.001531547700, -0.002407952000, 0.002275511400, 0.305521935800, + 0.005456280000, -0.003780111100, 0.002235803700, -0.000512158000, + -0.001326570100, 0.003127233900, -0.003260822600, -0.000223661700}, + { -0.003356689400, 0.003220032300, -0.001491017200, -0.000180271600, + 0.001602837600, -0.002530584900, 0.002572382100, 0.305528831700, + 0.005162307100, -0.003658974500, 0.002176562700, -0.000482746700, + -0.001339690600, 0.003129659400, -0.003266806700, -0.000214552200}, + { -0.003349458000, 0.003206380900, -0.001466550800, -0.000211731900, + 0.001663994200, -0.002662171400, 0.002859642300, 0.305525209100, + 0.004878336100, -0.003537488300, 0.002116848800, -0.000443644700, + -0.001352471400, 0.003141260600, -0.003272003200, -0.000205837900}, + { -0.003332926300, 0.003202823300, -0.001452058900, -0.000252375300, + 0.001725095700, -0.002784621900, 0.003148416400, 0.305529242100, + 0.004585797400, -0.003406922200, 0.002048118400, -0.000414183100, + -0.001365477700, 0.003153111900, -0.003287185700, -0.000196877000}, + { -0.003326162500, 0.003189613400, -0.001437413900, -0.000283721000, + 0.001795892300, -0.002906671400, 0.003427783000, 0.305532062500, + 0.004303401100, -0.003285234700, 0.001988112900, -0.000384077600, + -0.001388720100, 0.003165263500, -0.003292642400, -0.000188174200}, + { -0.003309959200, 0.003176999800, -0.001413956600, -0.000314280200, + 0.001856478200, -0.003038205600, 0.003717851500, 0.305533321300, + 0.004012019600, -0.003163333400, 0.001927602200, -0.000353662500, + -0.001402398400, 0.003168136400, -0.003308169600, -0.000179151700} +}; diff --git a/RTCP/GPUProc/src/UHEP/InvertedStationPPFWeights.h b/RTCP/GPUProc/src/UHEP/InvertedStationPPFWeights.h new file mode 100644 index 0000000000000000000000000000000000000000..6eb2287e7930d40674538a55b7720f45d930e886 --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/InvertedStationPPFWeights.h @@ -0,0 +1,7 @@ +#if !defined INVERTED_STATION_PPF_WEIGHTS_H +#define INVERTED_STATION_PPF_WEIGHTS_H + +extern int reverseSubbandMapping[512]; +extern const float invertedStationPPFWeights[1024][16] __attribute__ ((aligned(32))); + +#endif diff --git a/RTCP/GPUProc/src/UHEP/Transpose.cl b/RTCP/GPUProc/src/UHEP/Transpose.cl new file mode 100644 index 0000000000000000000000000000000000000000..6d3dae722fe3084a9ea569f775a38691ebbdad94 --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/Transpose.cl @@ -0,0 +1,43 @@ +typedef __global float2 (*TransposedDataType)[NR_TABS][NR_POLARIZATIONS][NR_SAMPLES_PER_SUBBAND + NR_STATION_FILTER_TAPS - 1][512]; +typedef __global float4 (*ComplexVoltagesType)[NR_SUBBANDS][NR_SAMPLES_PER_SUBBAND + NR_STATION_FILTER_TAPS - 1][NR_TABS]; + + +__kernel void UHEP_Transpose(__global void *restrict transposedDataPtr, + __global const void *restrict complexVoltagesPtr, + __global int reverseSubbandMapping[512]) +{ + TransposedDataType transposedData = (TransposedDataType) transposedDataPtr; + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + + __local float4 tmp[16][17]; + + uint tabBase = 16 * get_global_id(1); + uint sbBase = 16 * get_global_id(2); + + uint tabOffsetR = get_local_id(0) & 15; + uint tabR = tabBase + tabOffsetR; + uint sbOffsetR = get_local_id(0) >> 4; + int sbSourceR = reverseSubbandMapping[sbBase + sbOffsetR]; + bool doR = (NR_TABS % 16 == 0 || tabR < NR_TABS) && sbSourceR >= 0; + + uint tabOffsetW = get_local_id(0) >> 4; + uint tabW = tabBase + tabOffsetW; + uint sbOffsetW = get_local_id(0) & 15; + int sbSourceW = reverseSubbandMapping[sbBase + sbOffsetW]; + bool doW = NR_TABS % 16 == 0 || tabW < NR_TABS; + + for (int time = 0; time < NR_SAMPLES_PER_SUBBAND + NR_STATION_FILTER_TAPS - 1; time ++) { + if (doR) + tmp[tabOffsetR][sbOffsetR] = (*complexVoltages)[sbSourceR][time][tabR]; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (doW) { + float4 sample = sbSourceW >= 0 ? tmp[tabOffsetW][sbOffsetW] : 0; + (*transposedData)[tabW][0][time][sbBase + sbOffsetW] = sample.xy; + (*transposedData)[tabW][1][time][sbBase + sbOffsetW] = sample.zw; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } +} diff --git a/RTCP/GPUProc/src/UHEP/Transpose.cl-0.ptx b/RTCP/GPUProc/src/UHEP/Transpose.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..cb8248be325fa81a44e309be3a1a84ebe64e5e75 --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/Transpose.cl-0.ptx @@ -0,0 +1,142 @@ +// +// Generated by NVIDIA NVVM Compiler +// Compiler built on Tue Feb 7 07:15:59 2012 (1328595359) +// Driver 295.20 +// + +.version 3.0 +.target sm_21, texmode_independent +.address_size 32 + +.extern .shared .align 16 .b8 shr_1_tmp[4352]; + +.entry UHEP_Transpose( + .param .u32 .ptr .global .align 1 UHEP_Transpose_param_0, + .param .u32 .ptr .global .align 1 UHEP_Transpose_param_1, + .param .u32 .ptr .global .align 4 UHEP_Transpose_param_2 +) +{ + .reg .f32 %f<30>; + .reg .pred %p<7>; + .reg .s32 %r<61>; + + + ld.param.u32 %r25, [UHEP_Transpose_param_0]; + ld.param.u32 %r26, [UHEP_Transpose_param_1]; + ld.param.u32 %r27, [UHEP_Transpose_param_2]; + // inline asm + mov.u32 %r12, %envreg4; + // inline asm + // inline asm + mov.u32 %r13, %ntid.y; + // inline asm + // inline asm + mov.u32 %r14, %ctaid.y; + // inline asm + // inline asm + mov.u32 %r15, %tid.y; + // inline asm + add.s32 %r28, %r15, %r12; + mad.lo.s32 %r29, %r14, %r13, %r28; + // inline asm + mov.u32 %r16, %envreg5; + // inline asm + // inline asm + mov.u32 %r17, %ntid.z; + // inline asm + // inline asm + mov.u32 %r18, %ctaid.z; + // inline asm + // inline asm + mov.u32 %r19, %tid.z; + // inline asm + add.s32 %r30, %r19, %r16; + mad.lo.s32 %r31, %r18, %r17, %r30; + // inline asm + mov.u32 %r20, %tid.x; + // inline asm + shl.b32 %r32, %r29, 4; + and.b32 %r33, %r20, 15; + add.s32 %r34, %r32, %r33; + // inline asm + mov.u32 %r21, %tid.x; + // inline asm + shr.u32 %r35, %r21, 4; + shl.b32 %r36, %r31, 4; + add.s32 %r37, %r36, %r35; + shl.b32 %r38, %r37, 2; + add.s32 %r39, %r27, %r38; + setp.lt.u32 %p4, %r34, 4; + ld.global.u32 %r40, [%r39]; + setp.gt.s32 %p5, %r40, -1; + and.pred %p1, %p4, %p5; + // inline asm + mov.u32 %r22, %tid.x; + // inline asm + shr.u32 %r41, %r22, 4; + // inline asm + mov.u32 %r23, %tid.x; + // inline asm + and.b32 %r42, %r23, 15; + add.s32 %r43, %r36, %r42; + shl.b32 %r44, %r43, 2; + add.s32 %r45, %r27, %r44; + add.s32 %r46, %r41, %r32; + setp.lt.u32 %p2, %r46, 4; + mov.u32 %r47, shr_1_tmp; + mad.lo.s32 %r48, %r33, 272, %r47; + and.b32 %r49, %r21, -16; + add.s32 %r1, %r48, %r49; + ld.global.u32 %r50, [%r45]; + setp.gt.s32 %p3, %r50, -1; + mad.lo.s32 %r51, %r41, 272, %r47; + shl.b32 %r52, %r42, 4; + add.s32 %r2, %r51, %r52; + shl.b32 %r53, %r43, 3; + mad.lo.s32 %r54, %r46, 8511488, %r53; + add.s32 %r55, %r54, %r25; + add.s32 %r60, %r55, 4255744; + shl.b32 %r56, %r34, 4; + mad.lo.s32 %r57, %r40, 66496, %r56; + add.s32 %r59, %r26, %r57; + mov.u32 %r58, 1039; + +BB0_1: + add.s32 %r8, %r60, -4255744; + @!%p1 bra BB0_3; + + ld.global.v4.f32 {%f22, %f23, %f24, %f25}, [%r59]; + st.shared.v4.f32 [%r1], {%f22, %f23, %f24, %f25}; + +BB0_3: + bar.sync 0; + @!%p2 bra BB0_8; + + @%p3 bra BB0_6; + + mov.f32 %f1, 0f00000000; + mov.f32 %f26, %f1; + mov.f32 %f27, %f1; + mov.f32 %f28, %f1; + mov.f32 %f29, %f1; + bra.uni BB0_7; + +BB0_6: + ld.shared.v4.f32 {%f26, %f27, %f28, %f29}, [%r2]; + +BB0_7: + st.global.v2.f32 [%r8], {%f26, %f27}; + st.global.v2.f32 [%r8+4255744], {%f28, %f29}; + +BB0_8: + bar.sync 0; + add.s32 %r60, %r60, 4096; + add.s32 %r59, %r59, 64; + add.s32 %r58, %r58, -1; + setp.ne.s32 %p6, %r58, 0; + @%p6 bra BB0_1; + + ret; +} + + diff --git a/RTCP/GPUProc/src/UHEP/Transpose.cl.ok b/RTCP/GPUProc/src/UHEP/Transpose.cl.ok new file mode 100644 index 0000000000000000000000000000000000000000..9a0813f5c61f5d8d47bc77f0f80fd56a8ff504fd --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/Transpose.cl.ok @@ -0,0 +1,36 @@ +typedef __global float2 (*TransposedDataType)[NR_TABS][NR_POLARIZATIONS][NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1][512]; +typedef __global float2 (*ComplexVoltagesType)[NR_SUBBANDS][NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1][NR_TABS][NR_POLARIZATIONS]; + + +__kernel void UHEP_Transpose(__global void *restrict transposedDataPtr, + __global const void *restrict complexVoltagesPtr, + __global int reverseSubbandMapping[512]) +{ + TransposedDataType transposedData = (TransposedDataType) transposedDataPtr; + ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; + + __local float2 tmp[16][17][2]; + + uint base_tab = 16 * get_group_id(1); + uint base_sb = 16 * get_group_id(2); + uint pol = get_global_id(0); + uint id_1 = get_local_id(1); + uint id_2 = get_local_id(2); + int source_sb_1 = reverseSubbandMapping[base_sb + id_1]; + int source_sb_2 = reverseSubbandMapping[base_sb + id_2]; + + for (int time = 0; time < NR_TIMES_PER_BLOCK + NR_STATION_FILTER_TAPS - 1; time ++) { + if (NR_TABS % 16 == 0 || base_tab + id_1 < NR_TABS) + if (source_sb_2 >= 0) + tmp[id_2][id_1][pol] = (*complexVoltages)[source_sb_2][time][base_tab + id_1][pol]; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (NR_TABS % 16 == 0 || base_tab + id_2 < NR_TABS) { + float2 sample = source_sb_1 >= 0 ? tmp[id_1][id_2][pol] : 0; + (*transposedData)[base_tab + id_2][pol][time][base_sb + id_1] = sample; + } + + barrier(CLK_LOCAL_MEM_FENCE); + } +} diff --git a/RTCP/GPUProc/src/UHEP/Trigger.cl b/RTCP/GPUProc/src/UHEP/Trigger.cl new file mode 100644 index 0000000000000000000000000000000000000000..4f261307e8416e0aaccaedecd3172c4069839697 --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/Trigger.cl @@ -0,0 +1,153 @@ +typedef __global struct { + float mean, variance, bestValue; + uint bestApproxIndex; +} (*TriggerInfoType)[NR_TABS]; + +typedef __global float (*InvFIRfilteredDataType)[NR_TABS][NR_POLARIZATIONS][16][16][NR_SAMPLES_PER_SUBBAND / 4][16]; + + +#if 0 +float2 computeThreshold(__global const float *invFIRfilteredDataPtr) +{ + float M = 0, S = 0; + uint count = 0; + + for (uint i = get_local_id(0); i < sizeof(InvFIRfilteredDataType) / sizeof(float); i += get_local_size(0)) { + ++ count; + float sample = invFIRfilteredDataPtr[i]; + float t = sample - M; + M += t / count; + S += t * (sample - M); + } + + barrier(CLK_GLOBAL_MEM_FENCE); + + __local float2 local_MS[256]; + + local_MS[get_local_id(0)] = (float2) (M, S); + + for (uint i = get_local_size(0); (i >>= 1) != 0;) { + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(0) < i) + local_MS[get_local_id(0)] += local_MS[get_local_id(0) + i]; + } + + if (get_local_id(0) == 0) + local_MS[0].y = native_sqrt(local_MS[0].y); + + barrier(CLK_LOCAL_MEM_FENCE); + return local_MS[0]; +} +#endif + + +__kernel void trigger(__global const void *triggerInfoPtr, + __global const float *invFIRfilteredDataPtr) +{ + TriggerInfoType triggerInfo = (TriggerInfoType) triggerInfoPtr; + InvFIRfilteredDataType invFIRfilteredData = (InvFIRfilteredDataType) invFIRfilteredDataPtr; + + uint minor = get_local_id(0); + uint major = get_local_id(1); + uint me = 16 * major + minor; + uint tab = get_global_id(2); + + float mean = 0, sumsqdiff = 0; + float count = 0; + + __local union { + float f[16][16][16]; + float16 f16[16][16]; + struct { + float means[256], sumsqdiffs[256], values[256]; + uint approxIndices[256]; + } best; + } tmp; + + float16 h0, h1; + h1 /*.s789ABCDEF*/ = 0; + float16 sum_0; + float bestValue = 0; + uint bestApproxIndex = 0; + + for (uint time = 0; time < 1024 * NR_SAMPLES_PER_SUBBAND / 4096; time ++) { + for (uint i = 0; i < 16; i ++) { + float sampleX = (*invFIRfilteredData)[tab][0][i][major][time][minor]; + float sampleY = (*invFIRfilteredData)[tab][1][i][major][time][minor]; + float power = sampleX * sampleX + sampleY * sampleY; + tmp.f[i][major][minor] = power; + + count += 1.0f; + float delta = power - mean; + mean += delta / count; + sumsqdiff += delta * (power - mean); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + h0 = tmp.f16[major][minor]; + + sum_0.s0 = sum_0.sF + h0.s0 - h1.s5; + sum_0.s1 = sum_0.s0 + h0.s1 - h1.s6; + sum_0.s2 = sum_0.s1 + h0.s2 - h1.s7; + sum_0.s3 = sum_0.s2 + h0.s3 - h1.s8; + sum_0.s4 = sum_0.s3 + h0.s4 - h1.s9; + sum_0.s5 = sum_0.s4 + h0.s5 - h1.sA; + sum_0.s6 = sum_0.s5 + h0.s6 - h1.sB; + sum_0.s7 = sum_0.s6 + h0.s7 - h1.sC; + sum_0.s8 = sum_0.s7 + h0.s8 - h1.sD; + sum_0.s9 = sum_0.s8 + h0.s9 - h1.sE; + sum_0.sA = sum_0.s9 + h0.sA - h1.sF; + sum_0.sB = sum_0.sA + h0.sB - h0.s0; + sum_0.sC = sum_0.sB + h0.sC - h0.s1; + sum_0.sD = sum_0.sC + h0.sD - h0.s2; + sum_0.sE = sum_0.sD + h0.sE - h0.s3; + sum_0.sF = sum_0.sE + h0.sF - h0.s4; + + float m0 = max(max(sum_0.s0, sum_0.s1), max(sum_0.s2, sum_0.s3)); + float m1 = max(max(sum_0.s4, sum_0.s5), max(sum_0.s6, sum_0.s7)); + float m2 = max(max(sum_0.s8, sum_0.s9), max(sum_0.sA, sum_0.sB)); + float m3 = max(max(sum_0.sC, sum_0.sD), max(sum_0.sE, sum_0.sF)); + float m = max(max(m0, m1), max(m2, m3)); + + if (m >= bestValue) { + bestValue = m; + bestApproxIndex = me * 1024 * NR_SAMPLES_PER_SUBBAND / 256 + time * 16; + } + + h1 /*.s56789ABCDEF*/ = h0 /*.s56789ABCDEF*/; + + barrier(CLK_LOCAL_MEM_FENCE); + } + + tmp.best.means[me] = mean; + tmp.best.sumsqdiffs[me] = sumsqdiff; + tmp.best.values[me] = bestValue; + tmp.best.approxIndices[me] = bestApproxIndex; + + for (uint i = 256; (i >>= 1) != 0;) { + if (me < i) { + float meanA = tmp.best.means[me], meanB = tmp.best.means[me + i]; + float sumsqdiffA = tmp.best.sumsqdiffs[me], sumsqdiffB = tmp.best.sumsqdiffs[me + i]; + float delta = meanB - meanA; + tmp.best.means[me] = (meanA + meanB) / 2; + tmp.best.sumsqdiffs[me] = sumsqdiffA + sumsqdiffB + delta * delta * count / 2; + count *= 2; + + if (tmp.best.values[me] < tmp.best.values[me + i]) { + tmp.best.values[me] = tmp.best.values[me + i]; + tmp.best.approxIndices[me] = tmp.best.approxIndices[me + i]; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (me == 0) { + (*triggerInfo)[tab].mean = tmp.best.means[0]; + (*triggerInfo)[tab].variance = tmp.best.sumsqdiffs[0] / (count - 1); + (*triggerInfo)[tab].bestValue = tmp.best.values[0]; + (*triggerInfo)[tab].bestApproxIndex = tmp.best.approxIndices[0]; + } +} diff --git a/RTCP/GPUProc/src/UHEP/Trigger.cl-0.ptx b/RTCP/GPUProc/src/UHEP/Trigger.cl-0.ptx new file mode 100644 index 0000000000000000000000000000000000000000..ae2713f9037df945f18a988f001c6c0d0abbf29b --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/Trigger.cl-0.ptx @@ -0,0 +1,511 @@ +// +// Generated by NVIDIA NVVM Compiler +// Compiler built on Tue Feb 7 07:15:59 2012 (1328595359) +// Driver 295.20 +// + +.version 3.0 +.target sm_21, texmode_independent +.address_size 32 + +.extern .shared .align 64 .b8 shr_4_tmp[16384]; + +.entry trigger( + .param .u32 .ptr .global .align 1 trigger_param_0, + .param .u32 .ptr .global .align 4 trigger_param_1 +) +{ + .reg .f32 %f<434>; + .reg .pred %p<21>; + .reg .s32 %r<71>; + + + ld.param.u32 %r31, [trigger_param_1]; + // inline asm + mov.u32 %r23, %tid.x; + // inline asm + // inline asm + mov.u32 %r24, %tid.y; + // inline asm + shl.b32 %r32, %r24, 4; + // inline asm + mov.u32 %r25, %envreg5; + // inline asm + // inline asm + mov.u32 %r26, %ntid.z; + // inline asm + // inline asm + mov.u32 %r27, %ctaid.z; + // inline asm + // inline asm + mov.u32 %r28, %tid.z; + // inline asm + add.s32 %r33, %r28, %r25; + mad.lo.s32 %r3, %r27, %r26, %r33; + shl.b32 %r34, %r24, 10; + mov.u32 %r35, shr_4_tmp; + add.s32 %r36, %r35, %r34; + shl.b32 %r37, %r23, 6; + add.s32 %r4, %r36, %r37; + add.s32 %r2, %r32, %r23; + shl.b32 %r38, %r2, 12; + and.b32 %r5, %r38, 16773120; + shl.b32 %r39, %r3, 21; + add.s32 %r40, %r23, %r39; + shl.b32 %r41, %r24, 12; + add.s32 %r42, %r40, %r41; + shl.b32 %r43, %r42, 2; + add.s32 %r70, %r31, %r43; + shl.b32 %r44, %r24, 6; + shl.b32 %r45, %r23, 2; + add.s32 %r46, %r35, %r44; + add.s32 %r47, %r46, %r45; + add.s32 %r7, %r47, 3072; + mov.f32 %f420, 0f00000000; + mov.f32 %f430, %f420; + mov.f32 %f431, %f420; + mov.f32 %f432, %f420; + mov.f32 %f433, %f420; + mov.f32 %f422, %f405; + mov.f32 %f419, %f420; + mov.f32 %f421, %f420; + mov.f32 %f418, %f420; + mov.u32 %r65, 0; + mov.u32 %r64, %r65; + mov.f32 %f426, %f420; + mov.f32 %f427, %f420; + mov.f32 %f428, %f420; + mov.f32 %f429, %f420; + mov.f32 %f423, %f420; + mov.f32 %f424, %f420; + mov.f32 %f425, %f420; + +BB0_1: + mov.u32 %r68, %r70; + mov.u32 %r8, %r68; + shl.b32 %r49, %r64, 4; + add.s32 %r12, %r5, %r49; + mov.u32 %r67, 16; + mov.u32 %r66, %r7; + mov.u32 %r69, %r8; + +BB0_2: + mov.u32 %r15, %r69; + ld.global.f32 %f40, [%r15]; + ld.global.f32 %f41, [%r15+4194304]; + mul.ftz.f32 %f42, %f41, %f41; + fma.rn.ftz.f32 %f43, %f40, %f40, %f42; + st.shared.f32 [%r66+-3072], %f43; + sub.ftz.f32 %f44, %f43, %f419; + add.ftz.f32 %f45, %f421, 0f3F800000; + div.approx.ftz.f32 %f46, %f44, %f45; + add.ftz.f32 %f47, %f419, %f46; + sub.ftz.f32 %f48, %f43, %f47; + fma.rn.ftz.f32 %f49, %f44, %f48, %f420; + ld.global.f32 %f50, [%r15+262144]; + ld.global.f32 %f51, [%r15+4456448]; + mul.ftz.f32 %f52, %f51, %f51; + fma.rn.ftz.f32 %f53, %f50, %f50, %f52; + st.shared.f32 [%r66+-2048], %f53; + add.ftz.f32 %f54, %f45, 0f3F800000; + sub.ftz.f32 %f55, %f53, %f47; + div.approx.ftz.f32 %f56, %f55, %f54; + add.ftz.f32 %f57, %f47, %f56; + sub.ftz.f32 %f58, %f53, %f57; + fma.rn.ftz.f32 %f59, %f55, %f58, %f49; + ld.global.f32 %f60, [%r15+524288]; + ld.global.f32 %f61, [%r15+4718592]; + mul.ftz.f32 %f62, %f61, %f61; + fma.rn.ftz.f32 %f63, %f60, %f60, %f62; + st.shared.f32 [%r66+-1024], %f63; + add.ftz.f32 %f64, %f54, 0f3F800000; + sub.ftz.f32 %f65, %f63, %f57; + div.approx.ftz.f32 %f66, %f65, %f64; + add.ftz.f32 %f67, %f57, %f66; + sub.ftz.f32 %f68, %f63, %f67; + fma.rn.ftz.f32 %f69, %f65, %f68, %f59; + ld.global.f32 %f70, [%r15+786432]; + ld.global.f32 %f71, [%r15+4980736]; + mul.ftz.f32 %f72, %f71, %f71; + fma.rn.ftz.f32 %f73, %f70, %f70, %f72; + st.shared.f32 [%r66], %f73; + add.ftz.f32 %f421, %f64, 0f3F800000; + sub.ftz.f32 %f74, %f73, %f67; + div.approx.ftz.f32 %f75, %f74, %f421; + add.ftz.f32 %f419, %f67, %f75; + sub.ftz.f32 %f76, %f73, %f419; + fma.rn.ftz.f32 %f420, %f74, %f76, %f69; + add.s32 %r16, %r15, 1048576; + add.s32 %r66, %r66, 4096; + add.s32 %r67, %r67, -4; + setp.ne.s32 %p1, %r67, 0; + mov.u32 %r69, %r16; + @%p1 bra BB0_2; + + bar.sync 0; + ld.shared.v4.f32 {%f258, %f259, %f260, %f261}, [%r4]; + add.ftz.f32 %f79, %f422, %f258; + sub.ftz.f32 %f81, %f79, %f423; + add.ftz.f32 %f83, %f81, %f259; + sub.ftz.f32 %f85, %f83, %f424; + add.ftz.f32 %f87, %f85, %f260; + sub.ftz.f32 %f89, %f87, %f425; + add.ftz.f32 %f91, %f89, %f261; + sub.ftz.f32 %f93, %f91, %f426; + ld.shared.v4.f32 {%f274, %f423, %f424, %f425}, [%r4+16]; + add.ftz.f32 %f95, %f93, %f274; + sub.ftz.f32 %f97, %f95, %f427; + add.ftz.f32 %f99, %f97, %f423; + sub.ftz.f32 %f101, %f99, %f428; + add.ftz.f32 %f103, %f101, %f424; + sub.ftz.f32 %f105, %f103, %f429; + add.ftz.f32 %f107, %f105, %f425; + sub.ftz.f32 %f109, %f107, %f430; + ld.shared.v4.f32 {%f426, %f427, %f428, %f429}, [%r4+32]; + add.ftz.f32 %f111, %f109, %f426; + sub.ftz.f32 %f113, %f111, %f431; + add.ftz.f32 %f115, %f113, %f427; + sub.ftz.f32 %f117, %f115, %f432; + add.ftz.f32 %f119, %f117, %f428; + sub.ftz.f32 %f121, %f119, %f433; + add.ftz.f32 %f123, %f121, %f429; + sub.ftz.f32 %f124, %f123, %f258; + ld.shared.v4.f32 {%f430, %f431, %f432, %f433}, [%r4+48]; + add.ftz.f32 %f126, %f124, %f430; + sub.ftz.f32 %f127, %f126, %f259; + add.ftz.f32 %f129, %f127, %f431; + sub.ftz.f32 %f130, %f129, %f260; + add.ftz.f32 %f132, %f130, %f432; + sub.ftz.f32 %f133, %f132, %f261; + add.ftz.f32 %f135, %f133, %f433; + sub.ftz.f32 %f136, %f135, %f274; + max.f32 %f137, %f81, %f85; + max.f32 %f138, %f89, %f93; + max.f32 %f139, %f137, %f138; + max.f32 %f140, %f97, %f101; + max.f32 %f141, %f105, %f109; + max.f32 %f142, %f140, %f141; + max.f32 %f143, %f113, %f117; + max.f32 %f144, %f121, %f124; + max.f32 %f145, %f143, %f144; + max.f32 %f146, %f127, %f130; + max.f32 %f147, %f133, %f136; + max.f32 %f148, %f146, %f147; + max.f32 %f149, %f139, %f142; + max.f32 %f150, %f145, %f148; + max.f32 %f151, %f149, %f150; + setp.ltu.ftz.f32 %p2, %f151, %f418; + selp.f32 %f418, %f418, %f151, %p2; + selp.b32 %r65, %r65, %r12, %p2; + mov.f32 %f306, %f81; + mov.f32 %f307, %f85; + mov.f32 %f308, %f89; + mov.f32 %f309, %f93; + mov.f32 %f326, %f97; + mov.f32 %f327, %f101; + mov.f32 %f328, %f105; + mov.f32 %f329, %f109; + mov.f32 %f346, %f113; + mov.f32 %f347, %f117; + mov.f32 %f348, %f121; + mov.f32 %f349, %f124; + mov.f32 %f362, %f127; + mov.f32 %f363, %f130; + mov.f32 %f364, %f133; + mov.f32 %f422, %f136; + bar.sync 0; + add.s32 %r64, %r64, 1; + add.s32 %r21, %r8, 64; + setp.ne.s32 %p3, %r64, 256; + mov.u32 %r70, %r21; + @%p3 bra BB0_1; + + shl.b32 %r50, %r2, 2; + add.s32 %r22, %r35, %r50; + st.shared.f32 [%r22], %f419; + st.shared.f32 [%r22+1024], %f420; + st.shared.f32 [%r22+2048], %f418; + st.shared.u32 [%r22+3072], %r65; + setp.lt.u32 %p4, %r2, 128; + @%p4 bra BB0_5; + bra.uni BB0_7; + +BB0_5: + ld.shared.f32 %f152, [%r22+1536]; + ld.shared.f32 %f153, [%r22+512]; + sub.ftz.f32 %f154, %f153, %f419; + add.ftz.f32 %f155, %f419, %f153; + div.rn.ftz.f32 %f156, %f155, 0f40000000; + st.shared.f32 [%r22], %f156; + add.ftz.f32 %f157, %f420, %f152; + mul.ftz.f32 %f158, %f154, %f154; + mul.ftz.f32 %f159, %f158, %f421; + div.rn.ftz.f32 %f160, %f159, 0f40000000; + add.ftz.f32 %f161, %f157, %f160; + st.shared.f32 [%r22+1024], %f161; + add.ftz.f32 %f421, %f421, %f421; + ld.shared.f32 %f13, [%r22+2560]; + setp.lt.ftz.f32 %p5, %f418, %f13; + @%p5 bra BB0_6; + bra.uni BB0_7; + +BB0_6: + st.shared.f32 [%r22+2048], %f13; + ld.shared.u32 %r52, [%r22+3584]; + st.shared.u32 [%r22+3072], %r52; + +BB0_7: + bar.sync 0; + setp.lt.u32 %p6, %r2, 64; + @%p6 bra BB0_8; + bra.uni BB0_10; + +BB0_8: + ld.shared.f32 %f162, [%r22]; + ld.shared.f32 %f163, [%r22+1024]; + ld.shared.f32 %f164, [%r22+1280]; + ld.shared.f32 %f165, [%r22+256]; + sub.ftz.f32 %f166, %f165, %f162; + add.ftz.f32 %f167, %f162, %f165; + div.rn.ftz.f32 %f168, %f167, 0f40000000; + st.shared.f32 [%r22], %f168; + add.ftz.f32 %f169, %f163, %f164; + mul.ftz.f32 %f170, %f166, %f166; + mul.ftz.f32 %f171, %f170, %f421; + div.rn.ftz.f32 %f172, %f171, 0f40000000; + add.ftz.f32 %f173, %f169, %f172; + st.shared.f32 [%r22+1024], %f173; + add.ftz.f32 %f421, %f421, %f421; + ld.shared.f32 %f16, [%r22+2304]; + ld.shared.f32 %f174, [%r22+2048]; + setp.lt.ftz.f32 %p7, %f174, %f16; + @%p7 bra BB0_9; + bra.uni BB0_10; + +BB0_9: + st.shared.f32 [%r22+2048], %f16; + ld.shared.u32 %r53, [%r22+3328]; + st.shared.u32 [%r22+3072], %r53; + +BB0_10: + bar.sync 0; + setp.lt.u32 %p8, %r2, 32; + @%p8 bra BB0_11; + bra.uni BB0_13; + +BB0_11: + ld.shared.f32 %f175, [%r22]; + ld.shared.f32 %f176, [%r22+1024]; + ld.shared.f32 %f177, [%r22+1152]; + ld.shared.f32 %f178, [%r22+128]; + sub.ftz.f32 %f179, %f178, %f175; + add.ftz.f32 %f180, %f175, %f178; + div.rn.ftz.f32 %f181, %f180, 0f40000000; + st.shared.f32 [%r22], %f181; + add.ftz.f32 %f182, %f176, %f177; + mul.ftz.f32 %f183, %f179, %f179; + mul.ftz.f32 %f184, %f183, %f421; + div.rn.ftz.f32 %f185, %f184, 0f40000000; + add.ftz.f32 %f186, %f182, %f185; + st.shared.f32 [%r22+1024], %f186; + add.ftz.f32 %f421, %f421, %f421; + ld.shared.f32 %f19, [%r22+2176]; + ld.shared.f32 %f187, [%r22+2048]; + setp.lt.ftz.f32 %p9, %f187, %f19; + @%p9 bra BB0_12; + bra.uni BB0_13; + +BB0_12: + st.shared.f32 [%r22+2048], %f19; + ld.shared.u32 %r54, [%r22+3200]; + st.shared.u32 [%r22+3072], %r54; + +BB0_13: + bar.sync 0; + setp.lt.u32 %p10, %r2, 16; + @%p10 bra BB0_14; + bra.uni BB0_16; + +BB0_14: + ld.shared.f32 %f188, [%r22]; + ld.shared.f32 %f189, [%r22+1024]; + ld.shared.f32 %f190, [%r22+1088]; + ld.shared.f32 %f191, [%r22+64]; + sub.ftz.f32 %f192, %f191, %f188; + add.ftz.f32 %f193, %f188, %f191; + div.rn.ftz.f32 %f194, %f193, 0f40000000; + st.shared.f32 [%r22], %f194; + add.ftz.f32 %f195, %f189, %f190; + mul.ftz.f32 %f196, %f192, %f192; + mul.ftz.f32 %f197, %f196, %f421; + div.rn.ftz.f32 %f198, %f197, 0f40000000; + add.ftz.f32 %f199, %f195, %f198; + st.shared.f32 [%r22+1024], %f199; + add.ftz.f32 %f421, %f421, %f421; + ld.shared.f32 %f22, [%r22+2112]; + ld.shared.f32 %f200, [%r22+2048]; + setp.lt.ftz.f32 %p11, %f200, %f22; + @%p11 bra BB0_15; + bra.uni BB0_16; + +BB0_15: + st.shared.f32 [%r22+2048], %f22; + ld.shared.u32 %r55, [%r22+3136]; + st.shared.u32 [%r22+3072], %r55; + +BB0_16: + bar.sync 0; + setp.lt.u32 %p12, %r2, 8; + @%p12 bra BB0_17; + bra.uni BB0_19; + +BB0_17: + ld.shared.f32 %f201, [%r22]; + ld.shared.f32 %f202, [%r22+1024]; + ld.shared.f32 %f203, [%r22+1056]; + ld.shared.f32 %f204, [%r22+32]; + sub.ftz.f32 %f205, %f204, %f201; + add.ftz.f32 %f206, %f201, %f204; + div.rn.ftz.f32 %f207, %f206, 0f40000000; + st.shared.f32 [%r22], %f207; + add.ftz.f32 %f208, %f202, %f203; + mul.ftz.f32 %f209, %f205, %f205; + mul.ftz.f32 %f210, %f209, %f421; + div.rn.ftz.f32 %f211, %f210, 0f40000000; + add.ftz.f32 %f212, %f208, %f211; + st.shared.f32 [%r22+1024], %f212; + add.ftz.f32 %f421, %f421, %f421; + ld.shared.f32 %f25, [%r22+2080]; + ld.shared.f32 %f213, [%r22+2048]; + setp.lt.ftz.f32 %p13, %f213, %f25; + @%p13 bra BB0_18; + bra.uni BB0_19; + +BB0_18: + st.shared.f32 [%r22+2048], %f25; + ld.shared.u32 %r56, [%r22+3104]; + st.shared.u32 [%r22+3072], %r56; + +BB0_19: + bar.sync 0; + setp.lt.u32 %p14, %r2, 4; + @%p14 bra BB0_20; + bra.uni BB0_22; + +BB0_20: + ld.shared.f32 %f214, [%r22]; + ld.shared.f32 %f215, [%r22+1024]; + ld.shared.f32 %f216, [%r22+1040]; + ld.shared.f32 %f217, [%r22+16]; + sub.ftz.f32 %f218, %f217, %f214; + add.ftz.f32 %f219, %f214, %f217; + div.rn.ftz.f32 %f220, %f219, 0f40000000; + st.shared.f32 [%r22], %f220; + add.ftz.f32 %f221, %f215, %f216; + mul.ftz.f32 %f222, %f218, %f218; + mul.ftz.f32 %f223, %f222, %f421; + div.rn.ftz.f32 %f224, %f223, 0f40000000; + add.ftz.f32 %f225, %f221, %f224; + st.shared.f32 [%r22+1024], %f225; + add.ftz.f32 %f421, %f421, %f421; + ld.shared.f32 %f28, [%r22+2064]; + ld.shared.f32 %f226, [%r22+2048]; + setp.lt.ftz.f32 %p15, %f226, %f28; + @%p15 bra BB0_21; + bra.uni BB0_22; + +BB0_21: + st.shared.f32 [%r22+2048], %f28; + ld.shared.u32 %r57, [%r22+3088]; + st.shared.u32 [%r22+3072], %r57; + +BB0_22: + bar.sync 0; + setp.lt.u32 %p16, %r2, 2; + @%p16 bra BB0_23; + bra.uni BB0_25; + +BB0_23: + ld.shared.f32 %f227, [%r22]; + ld.shared.f32 %f228, [%r22+1024]; + ld.shared.f32 %f229, [%r22+1032]; + ld.shared.f32 %f230, [%r22+8]; + sub.ftz.f32 %f231, %f230, %f227; + add.ftz.f32 %f232, %f227, %f230; + div.rn.ftz.f32 %f233, %f232, 0f40000000; + st.shared.f32 [%r22], %f233; + add.ftz.f32 %f234, %f228, %f229; + mul.ftz.f32 %f235, %f231, %f231; + mul.ftz.f32 %f236, %f235, %f421; + div.rn.ftz.f32 %f237, %f236, 0f40000000; + add.ftz.f32 %f238, %f234, %f237; + st.shared.f32 [%r22+1024], %f238; + add.ftz.f32 %f421, %f421, %f421; + ld.shared.f32 %f31, [%r22+2056]; + ld.shared.f32 %f239, [%r22+2048]; + setp.lt.ftz.f32 %p17, %f239, %f31; + @%p17 bra BB0_24; + bra.uni BB0_25; + +BB0_24: + st.shared.f32 [%r22+2048], %f31; + ld.shared.u32 %r58, [%r22+3080]; + st.shared.u32 [%r22+3072], %r58; + +BB0_25: + bar.sync 0; + setp.eq.s32 %p18, %r2, 0; + @%p18 bra BB0_26; + bra.uni BB0_28; + +BB0_26: + ld.shared.f32 %f240, [%r22]; + ld.shared.f32 %f241, [%r22+1024]; + ld.shared.f32 %f242, [%r22+1028]; + ld.shared.f32 %f243, [%r22+4]; + sub.ftz.f32 %f244, %f243, %f240; + add.ftz.f32 %f245, %f240, %f243; + div.rn.ftz.f32 %f246, %f245, 0f40000000; + st.shared.f32 [%r22], %f246; + add.ftz.f32 %f247, %f241, %f242; + mul.ftz.f32 %f248, %f244, %f244; + mul.ftz.f32 %f249, %f248, %f421; + div.rn.ftz.f32 %f250, %f249, 0f40000000; + add.ftz.f32 %f251, %f247, %f250; + st.shared.f32 [%r22+1024], %f251; + add.ftz.f32 %f421, %f421, %f421; + ld.shared.f32 %f34, [%r22+2052]; + ld.shared.f32 %f252, [%r22+2048]; + setp.lt.ftz.f32 %p19, %f252, %f34; + @%p19 bra BB0_27; + bra.uni BB0_28; + +BB0_27: + st.shared.f32 [%r22+2048], %f34; + ld.shared.u32 %r59, [%r22+3076]; + st.shared.u32 [%r22+3072], %r59; + +BB0_28: + bar.sync 0; + @%p18 bra BB0_30; + + ret; + +BB0_30: + shl.b32 %r60, %r3, 4; + ld.param.u32 %r63, [trigger_param_0]; + add.s32 %r61, %r63, %r60; + ld.shared.f32 %f253, [shr_4_tmp]; + st.global.f32 [%r61], %f253; + add.ftz.f32 %f254, %f421, 0fBF800000; + ld.shared.f32 %f255, [shr_4_tmp+1024]; + div.approx.ftz.f32 %f256, %f255, %f254; + st.global.f32 [%r61+4], %f256; + ld.shared.f32 %f257, [shr_4_tmp+2048]; + st.global.f32 [%r61+8], %f257; + ld.shared.u32 %r62, [shr_4_tmp+3072]; + st.global.u32 [%r61+12], %r62; + ret; +} + + diff --git a/RTCP/GPUProc/src/UHEP/Trigger.cl.8 b/RTCP/GPUProc/src/UHEP/Trigger.cl.8 new file mode 100644 index 0000000000000000000000000000000000000000..c8215451c60c437dd359baa29d00cea9ebb0ad21 --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/Trigger.cl.8 @@ -0,0 +1,125 @@ +typedef __global struct { + float bestValue; + uint bestApproxIndex; +} (*TriggerInfoType)[NR_TABS]; + +typedef __global float (*InvFIRfilteredDataType)[NR_TABS][NR_POLARIZATIONS][8][32][NR_TIMES_PER_BLOCK / 2][8]; + + +float2 computeThreshold(__global const float *invFIRfilteredDataPtr) +{ + float M = 0, S = 0; + uint count = 0; + + for (uint i = get_local_id(0); i < sizeof(InvFIRfilteredDataType) / sizeof(float); i += get_local_size(0)) { + ++ count; + float sample = invFIRfilteredDataPtr[i]; + float t = sample - M; + M += t / count; + S += t * (sample - M); + } + + barrier(CLK_GLOBAL_MEM_FENCE); + + __local float2 local_MS[256]; + + local_MS[get_local_id(0)] = (float2) (M, S); + + for (uint i = get_local_size(0); (i >>= 1) != 0;) { + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(0) < i) + local_MS[get_local_id(0)] += local_MS[get_local_id(0) + i]; + } + + if (get_local_id(0) == 0) + local_MS[0].y = native_sqrt(local_MS[0].y); + + barrier(CLK_LOCAL_MEM_FENCE); + return local_MS[0]; +} + + +__kernel void trigger(__global const void *triggerInfoPtr, + __global const float *invFIRfilteredDataPtr) +{ + TriggerInfoType triggerInfo = (TriggerInfoType) triggerInfoPtr; + InvFIRfilteredDataType invFIRfilteredData = (InvFIRfilteredDataType) invFIRfilteredDataPtr; + + uint minor = get_local_id(0); + uint major = get_local_id(1); + uint me = 8 * major + minor; + uint tab = get_global_id(2); + + __local union { + float f[8][32][8]; + float8 f8[32][8]; + struct { + float values[256]; + uint approxIndices[256]; + } best; + } tmp; + + float8 h0, h1, h2; + h1 = 0; + h2.s567 = 0; + float8 sum_0; + float bestValue = 0; + uint bestApproxIndex = 0; + + for (uint time = 0; time < 1024 * NR_TIMES_PER_BLOCK / 2048; time ++) { + for (uint i = 0; i < 8; i ++) { + float sampleX = (*invFIRfilteredData)[tab][0][i][major][time][minor]; + float sampleY = (*invFIRfilteredData)[tab][1][i][major][time][minor]; + float power = sampleX * sampleX + sampleY * sampleY; + tmp.f[i][major][minor] = power; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + h0 = tmp.f8[major][minor]; + + sum_0.s0 = sum_0.s7 + h0.s0 - h2.s5; + sum_0.s1 = sum_0.s0 + h0.s1 - h2.s6; + sum_0.s2 = sum_0.s1 + h0.s2 - h2.s7; + sum_0.s3 = sum_0.s2 + h0.s3 - h1.s0; + sum_0.s4 = sum_0.s3 + h0.s4 - h1.s1; + sum_0.s5 = sum_0.s4 + h0.s5 - h1.s2; + sum_0.s6 = sum_0.s5 + h0.s6 - h1.s3; + sum_0.s7 = sum_0.s6 + h0.s7 - h1.s4; + + float m0 = max(max(sum_0.s0, sum_0.s1), max(sum_0.s2, sum_0.s3)); + float m1 = max(max(sum_0.s4, sum_0.s5), max(sum_0.s6, sum_0.s7)); + float m = max(m0, m1); + + if (m >= bestValue) { + bestValue = m; + bestApproxIndex = me * 1024 * NR_TIMES_PER_BLOCK / 256 + time * 8; + } + + h2.s567 = h1.s567; + h1 = h0; + + barrier(CLK_LOCAL_MEM_FENCE); + } + + + tmp.best.values[me] = bestValue; + tmp.best.approxIndices[me] = bestApproxIndex; + + for (uint i = 256; (i >>= 1) != 0;) { + if (me < i) { + if (tmp.best.values[me] < tmp.best.values[me + i]) { + tmp.best.values[me] = tmp.best.values[me + i]; + tmp.best.approxIndices[me] = tmp.best.approxIndices[me + i]; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (me == 0) { + (*triggerInfo)[tab].bestValue = tmp.best.values[0]; + (*triggerInfo)[tab].bestApproxIndex = tmp.best.approxIndices[0]; + } +} diff --git a/RTCP/GPUProc/src/UHEP/Trigger.cl.ok b/RTCP/GPUProc/src/UHEP/Trigger.cl.ok new file mode 100644 index 0000000000000000000000000000000000000000..dc6d7a3aff7c6a437edb0fb743e1d53f5f95bdce --- /dev/null +++ b/RTCP/GPUProc/src/UHEP/Trigger.cl.ok @@ -0,0 +1,133 @@ +typedef __global struct { + float bestValue; + uint bestApproxIndex; +} (*TriggerInfoType)[NR_TABS]; + +typedef __global float (*InvFIRfilteredDataType)[NR_TABS][NR_POLARIZATIONS][16][16][NR_TIMES_PER_BLOCK / 4][16]; + + +float2 computeThreshold(__global const float *invFIRfilteredDataPtr) +{ + float M = 0, S = 0; + uint count = 0; + + for (uint i = get_local_id(0); i < sizeof(InvFIRfilteredDataType) / sizeof(float); i += get_local_size(0)) { + ++ count; + float sample = invFIRfilteredDataPtr[i]; + float t = sample - M; + M += t / count; + S += t * (sample - M); + } + + barrier(CLK_GLOBAL_MEM_FENCE); + + __local float2 local_MS[256]; + + local_MS[get_local_id(0)] = (float2) (M, S); + + for (uint i = get_local_size(0); (i >>= 1) != 0;) { + barrier(CLK_LOCAL_MEM_FENCE); + + if (get_local_id(0) < i) + local_MS[get_local_id(0)] += local_MS[get_local_id(0) + i]; + } + + if (get_local_id(0) == 0) + local_MS[0].y = native_sqrt(local_MS[0].y); + + barrier(CLK_LOCAL_MEM_FENCE); + return local_MS[0]; +} + + +__kernel void trigger(__global const void *triggerInfoPtr, + __global const float *invFIRfilteredDataPtr) +{ + TriggerInfoType triggerInfo = (TriggerInfoType) triggerInfoPtr; + InvFIRfilteredDataType invFIRfilteredData = (InvFIRfilteredDataType) invFIRfilteredDataPtr; + + uint minor = get_local_id(0); + uint major = get_local_id(1); + uint me = 16 * major + minor; + uint tab = get_global_id(2); + + __local union { + float f[16][16][16]; + float16 f16[16][16]; + struct { + float values[256]; + uint approxIndices[256]; + } best; + } tmp; + + float16 h0, h1; + h1.s789ABCDEF = 0; + float16 sum_0, sum_1; + float bestValue = 0; + uint bestApproxIndex = 0; + + for (uint time = 0; time < 1024 * NR_TIMES_PER_BLOCK / 4096; time ++) { + for (uint i = 0; i < 16; i ++) { + float sampleX = (*invFIRfilteredData)[tab][0][i][major][time][minor]; + float sampleY = (*invFIRfilteredData)[tab][1][i][major][time][minor]; + float power = sampleX * sampleX + sampleY * sampleY; + tmp.f[i][major][minor] = power; + } + + barrier(CLK_LOCAL_MEM_FENCE); + + h0 = tmp.f16[major][minor]; + + sum_0.s0 = sum_0.sF + h0.s0 - h1.s5; + sum_0.s1 = sum_0.s0 + h0.s1 - h1.s6; + sum_0.s2 = sum_0.s1 + h0.s2 - h1.s7; + sum_0.s3 = sum_0.s2 + h0.s3 - h1.s8; + sum_0.s4 = sum_0.s3 + h0.s4 - h1.s9; + sum_0.s5 = sum_0.s4 + h0.s5 - h1.sA; + sum_0.s6 = sum_0.s5 + h0.s6 - h1.sB; + sum_0.s7 = sum_0.s6 + h0.s7 - h1.sC; + sum_0.s8 = sum_0.s7 + h0.s8 - h1.sD; + sum_0.s9 = sum_0.s8 + h0.s9 - h1.sE; + sum_0.sA = sum_0.s9 + h0.sA - h1.sF; + sum_0.sB = sum_0.sA + h0.sB - h0.s0; + sum_0.sC = sum_0.sB + h0.sC - h0.s1; + sum_0.sD = sum_0.sC + h0.sD - h0.s2; + sum_0.sE = sum_0.sD + h0.sE - h0.s3; + sum_0.sF = sum_0.sE + h0.sF - h0.s4; + + float m0 = max(max(sum_0.s0, sum_0.s1), max(sum_0.s2, sum_0.s3)); + float m1 = max(max(sum_0.s4, sum_0.s5), max(sum_0.s6, sum_0.s7)); + float m2 = max(max(sum_0.s8, sum_0.s9), max(sum_0.sA, sum_0.sB)); + float m3 = max(max(sum_0.sC, sum_0.sD), max(sum_0.sE, sum_0.sF)); + float m = max(max(m0, m1), max(m2, m3)); + + if (m >= bestValue) { + bestValue = m; + bestApproxIndex = me * 1024 * NR_TIMES_PER_BLOCK / 256 + time * 16; + } + + h1.s56789ABCDEF = h0.s56789ABCDEF; + + barrier(CLK_LOCAL_MEM_FENCE); + } + + + tmp.best.values[me] = bestValue; + tmp.best.approxIndices[me] = bestApproxIndex; + + for (uint i = 256; (i >>= 1) != 0;) { + if (me < i) { + if (tmp.best.values[me] < tmp.best.values[me + i]) { + tmp.best.values[me] = tmp.best.values[me + i]; + tmp.best.approxIndices[me] = tmp.best.approxIndices[me + i]; + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + } + + if (me == 0) { + (*triggerInfo)[tab].bestValue = tmp.best.values[0]; + (*triggerInfo)[tab].bestApproxIndex = tmp.best.approxIndices[0]; + } +} diff --git a/RTCP/GPUProc/src/WallClockTime.h b/RTCP/GPUProc/src/WallClockTime.h new file mode 100644 index 0000000000000000000000000000000000000000..352218f342f7c3c49d4b86c3e244fde4984b4ae0 --- /dev/null +++ b/RTCP/GPUProc/src/WallClockTime.h @@ -0,0 +1,108 @@ +//# Copyright (C) 2007 +//# ASTRON (Netherlands Foundation for Research in Astronomy) +//# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl +//# +//# This program is free software; you can redistribute it and/or modify +//# it under the terms of the GNU General Public License as published by +//# the Free Software Foundation; either version 2 of the License, or +//# (at your option) any later version. +//# +//# This program is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License +//# along with this program; if not, write to the Free Software +//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +//# +//# $Id: WallClockTime.h 17975 2011-05-10 09:52:51Z mol $ + +#ifndef LOFAR_GPUPROC_WALL_CLOCK_TIME_H +#define LOFAR_GPUPROC_WALL_CLOCK_TIME_H + +//# Never #include <config.h> or #include <lofar_config.h> in a header file! + +#include <Interface/RSPTimeStamp.h> +#include <Common/Thread/Condition.h> +#include <Common/Thread/Mutex.h> + +#include <errno.h> +#include <time.h> + + +namespace LOFAR { +namespace RTCP { + + +class WallClockTime +{ + public: + WallClockTime(); + + bool waitUntil(const struct timespec &); + bool waitUntil(time_t); + bool waitUntil(const TimeStamp &); + void waitForever(); + + void cancelWait(); + + private: + Mutex itsMutex; + Condition itsCondition; + bool itsCancelled; +}; + + +inline WallClockTime::WallClockTime() +: + itsCancelled(false) +{ +} + + +inline bool WallClockTime::waitUntil(const struct timespec ×pec) +{ + ScopedLock scopedLock(itsMutex); + + while (!itsCancelled && itsCondition.wait(itsMutex, timespec)) + ; + + return !itsCancelled; +} + + +inline bool WallClockTime::waitUntil(time_t timestamp) +{ + struct timespec timespec = { timestamp, 0 }; + + return waitUntil(timespec); +} + + +inline bool WallClockTime::waitUntil(const TimeStamp ×tamp) +{ + return waitUntil(static_cast<struct timespec>(timestamp)); +} + +inline void WallClockTime::waitForever() +{ + ScopedLock scopedLock(itsMutex); + + while (!itsCancelled) + itsCondition.wait(itsMutex); +} + +inline void WallClockTime::cancelWait() +{ + ScopedLock scopedLock(itsMutex); + + itsCancelled = true; + itsCondition.signal(); +} + + +} // namespace RTCP +} // namespace LOFAR + +#endif diff --git a/RTCP/GPUProc/src/fft.cl b/RTCP/GPUProc/src/fft.cl new file mode 100644 index 0000000000000000000000000000000000000000..759666803e134cd04bc0f3e7edcdc8beff6a8986 --- /dev/null +++ b/RTCP/GPUProc/src/fft.cl @@ -0,0 +1,356 @@ + + +// Copyright (C) 2010-2012 Advanced Micro Devices, Inc. All Rights Reserved. + + +__constant float2 twiddles[7] = { +(float2)(1.0000000000000000000000000000000000f, -0.0000000000000000000000000000000000f), +(float2)(1.0000000000000000000000000000000000f, -0.0000000000000000000000000000000000f), +(float2)(1.0000000000000000000000000000000000f, -0.0000000000000000000000000000000000f), +(float2)(1.0000000000000000000000000000000000f, -0.0000000000000000000000000000000000f), +(float2)(0.7071067811865475727373109293694142f, -0.7071067811865474617150084668537602f), +(float2)(0.0000000000000000612323399573676604f, -1.0000000000000000000000000000000000f), +(float2)(-0.7071067811865474617150084668537602f, -0.7071067811865475727373109293694142f), +}; + + +#define fvect2 float2 + +#define C8Q 0.70710678118654752440084436210485f +#define C5QA 0.30901699437494742410229341718282f +#define C5QB 0.95105651629515357211643933337938f +#define C5QC 0.50000000000000000000000000000000f +#define C5QD 0.58778525229247312916870595463907f +#define C5QE 0.80901699437494742410229341718282f +#define C3QA 0.50000000000000000000000000000000f +#define C3QB 0.86602540378443864676372317075294f + +__attribute__((always_inline)) void +FwdRad2B1(float2 *R0, float2 *R1) +{ + + float2 T; + + (*R1) = (*R0) - (*R1); + (*R0) = 2.0f * (*R0) - (*R1); + + +} + +__attribute__((always_inline)) void +InvRad2B1(float2 *R0, float2 *R1) +{ + + float2 T; + + (*R1) = (*R0) - (*R1); + (*R0) = 2.0f * (*R0) - (*R1); + + +} + +__attribute__((always_inline)) void +FwdRad4B1(float2 *R0, float2 *R2, float2 *R1, float2 *R3) +{ + + float2 T; + + (*R1) = (*R0) - (*R1); + (*R0) = 2.0f * (*R0) - (*R1); + (*R3) = (*R2) - (*R3); + (*R2) = 2.0f * (*R2) - (*R3); + + (*R2) = (*R0) - (*R2); + (*R0) = 2.0f * (*R0) - (*R2); + (*R3) = (*R1) + (fvect2)(-(*R3).y, (*R3).x); + (*R1) = 2.0f * (*R1) - (*R3); + + T = (*R1); (*R1) = (*R2); (*R2) = T; + +} + +__attribute__((always_inline)) void +InvRad4B1(float2 *R0, float2 *R2, float2 *R1, float2 *R3) +{ + + float2 T; + + (*R1) = (*R0) - (*R1); + (*R0) = 2.0f * (*R0) - (*R1); + (*R3) = (*R2) - (*R3); + (*R2) = 2.0f * (*R2) - (*R3); + + (*R2) = (*R0) - (*R2); + (*R0) = 2.0f * (*R0) - (*R2); + (*R3) = (*R1) + (fvect2)((*R3).y, -(*R3).x); + (*R1) = 2.0f * (*R1) - (*R3); + + T = (*R1); (*R1) = (*R2); (*R2) = T; + +} + +__attribute__((always_inline)) void +FwdPass0(uint rw, uint b, uint me, uint inOffset, uint outOffset, __global float2 *bufIn, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3) +{ + + + if(rw) + { + (*R0) = bufIn[inOffset + ( 0 + me*1 + 0 + 0 )*1]; + (*R1) = bufIn[inOffset + ( 0 + me*1 + 0 + 2 )*1]; + (*R2) = bufIn[inOffset + ( 0 + me*1 + 0 + 4 )*1]; + (*R3) = bufIn[inOffset + ( 0 + me*1 + 0 + 6 )*1]; + } + + + FwdRad4B1(R0, R1, R2, R3); + + + if(rw) + { + bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 0 )*1] = (*R0).x; + bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 1 )*1] = (*R1).x; + bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 2 )*1] = (*R2).x; + bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 3 )*1] = (*R3).x; + } + + + barrier(CLK_LOCAL_MEM_FENCE); + + if(rw) + { + (*R0).x = bufOutRe[outOffset + ( 0 + me*2 + 0 + 0 )*1]; + (*R1).x = bufOutRe[outOffset + ( 0 + me*2 + 0 + 4 )*1]; + (*R2).x = bufOutRe[outOffset + ( 0 + me*2 + 1 + 0 )*1]; + (*R3).x = bufOutRe[outOffset + ( 0 + me*2 + 1 + 4 )*1]; + } + + + barrier(CLK_LOCAL_MEM_FENCE); + + if(rw) + { + bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 0 )*1] = (*R0).y; + bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 1 )*1] = (*R1).y; + bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 2 )*1] = (*R2).y; + bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 3 )*1] = (*R3).y; + } + + + barrier(CLK_LOCAL_MEM_FENCE); + + if(rw) + { + (*R0).y = bufOutIm[outOffset + ( 0 + me*2 + 0 + 0 )*1]; + (*R1).y = bufOutIm[outOffset + ( 0 + me*2 + 0 + 4 )*1]; + (*R2).y = bufOutIm[outOffset + ( 0 + me*2 + 1 + 0 )*1]; + (*R3).y = bufOutIm[outOffset + ( 0 + me*2 + 1 + 4 )*1]; + } + + + barrier(CLK_LOCAL_MEM_FENCE); + +} + +__attribute__((always_inline)) void +FwdPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3) +{ + + + + { + float2 W = twiddles[3 + 1*((2*me + 0)%4) + 0]; + float TR, TI; + TR = (W.x * (*R1).x) - (W.y * (*R1).y); + TI = (W.y * (*R1).x) + (W.x * (*R1).y); + (*R1).x = TR; + (*R1).y = TI; + } + + { + float2 W = twiddles[3 + 1*((2*me + 1)%4) + 0]; + float TR, TI; + TR = (W.x * (*R3).x) - (W.y * (*R3).y); + TI = (W.y * (*R3).x) + (W.x * (*R3).y); + (*R3).x = TR; + (*R3).y = TI; + } + + FwdRad2B1(R0, R1); + FwdRad2B1(R2, R3); + + + if(rw) + { + __global float4 *buff4g = bufOut; + + buff4g[ 1*me + 0 + 0 ] = (float4)((*R0).x, (*R0).y, (*R2).x, (*R2).y) ; + buff4g[ 1*me + 0 + 2 ] = (float4)((*R1).x, (*R1).y, (*R3).x, (*R3).y) ; + } + +} + +__attribute__((always_inline)) void +InvPass0(uint rw, uint b, uint me, uint inOffset, uint outOffset, __global float2 *bufIn, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3) +{ + + + if(rw) + { + (*R0) = bufIn[inOffset + ( 0 + me*1 + 0 + 0 )*1]; + (*R1) = bufIn[inOffset + ( 0 + me*1 + 0 + 2 )*1]; + (*R2) = bufIn[inOffset + ( 0 + me*1 + 0 + 4 )*1]; + (*R3) = bufIn[inOffset + ( 0 + me*1 + 0 + 6 )*1]; + } + + + InvRad4B1(R0, R1, R2, R3); + + + if(rw) + { + bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 0 )*1] = (*R0).x; + bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 1 )*1] = (*R1).x; + bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 2 )*1] = (*R2).x; + bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 3 )*1] = (*R3).x; + } + + + barrier(CLK_LOCAL_MEM_FENCE); + + if(rw) + { + (*R0).x = bufOutRe[outOffset + ( 0 + me*2 + 0 + 0 )*1]; + (*R1).x = bufOutRe[outOffset + ( 0 + me*2 + 0 + 4 )*1]; + (*R2).x = bufOutRe[outOffset + ( 0 + me*2 + 1 + 0 )*1]; + (*R3).x = bufOutRe[outOffset + ( 0 + me*2 + 1 + 4 )*1]; + } + + + barrier(CLK_LOCAL_MEM_FENCE); + + if(rw) + { + bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 0 )*1] = (*R0).y; + bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 1 )*1] = (*R1).y; + bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 2 )*1] = (*R2).y; + bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 3 )*1] = (*R3).y; + } + + + barrier(CLK_LOCAL_MEM_FENCE); + + if(rw) + { + (*R0).y = bufOutIm[outOffset + ( 0 + me*2 + 0 + 0 )*1]; + (*R1).y = bufOutIm[outOffset + ( 0 + me*2 + 0 + 4 )*1]; + (*R2).y = bufOutIm[outOffset + ( 0 + me*2 + 1 + 0 )*1]; + (*R3).y = bufOutIm[outOffset + ( 0 + me*2 + 1 + 4 )*1]; + } + + + barrier(CLK_LOCAL_MEM_FENCE); + +} + +__attribute__((always_inline)) void +InvPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, float2 *R0, float2 *R1, float2 *R2, float2 *R3) +{ + + + + { + float2 W = twiddles[3 + 1*((2*me + 0)%4) + 0]; + float TR, TI; + TR = (W.x * (*R1).x) + (W.y * (*R1).y); + TI = -(W.y * (*R1).x) + (W.x * (*R1).y); + (*R1).x = TR; + (*R1).y = TI; + } + + { + float2 W = twiddles[3 + 1*((2*me + 1)%4) + 0]; + float TR, TI; + TR = (W.x * (*R3).x) + (W.y * (*R3).y); + TI = -(W.y * (*R3).x) + (W.x * (*R3).y); + (*R3).x = TR; + (*R3).y = TI; + } + + InvRad2B1(R0, R1); + InvRad2B1(R2, R3); + + + if(rw) + { + __global float4 *buff4g = bufOut; + + buff4g[ 1*me + 0 + 0 ] = (float4)((*R0).x, (*R0).y, (*R2).x, (*R2).y) * 1.2500000000000000e-01f; + buff4g[ 1*me + 0 + 2 ] = (float4)((*R1).x, (*R1).y, (*R3).x, (*R3).y) * 1.2500000000000000e-01f; + } + +} + + typedef union { uint u; int i; } cb_t; + +__kernel __attribute__((reqd_work_group_size (64,1,1))) +//void fft_fwd(__constant cb_t *cb __attribute__((max_constant_size(32))), __global const float2 * restrict gbIn, __global float2 * restrict gbOut) +void fft_fwd(__global const float2 * restrict gbIn, __global float2 * restrict gbOut) +{ + uint me = get_local_id(0); + uint batch = get_group_id(0); + + __local float lds[256]; + + uint iOffset; + uint oOffset; + __global float2 *lwbIn; + __global float2 *lwbOut; + + float2 R0, R1, R2, R3; + + //uint rw = (me < ((cb[0].u) - batch*32)*2) ? 1 : 0; + uint rw = (me < ((1) - batch*32)*2) ? 1 : 0; + + uint b = 0; + + iOffset = (batch*32 + (me/2))*8; + oOffset = (batch*32 + (me/2))*8; + lwbIn = gbIn + iOffset; + lwbOut = gbOut + oOffset; + + FwdPass0(rw, b, me%2, 0, (me/2)*8, lwbIn, lds, lds, &R0, &R1, &R2, &R3); + FwdPass1(rw, b, me%2, (me/2)*8, 0, lds, lds, lwbOut, &R0, &R1, &R2, &R3); +} + +__kernel __attribute__((reqd_work_group_size (64,1,1))) +//void fft_back(__constant cb_t *cb __attribute__((max_constant_size(32))), __global const float2 * restrict gbIn, __global float2 * restrict gbOut) +void fft_back(__global const float2 * restrict gbIn, __global float2 * restrict gbOut) +{ + uint me = get_local_id(0); + uint batch = get_group_id(0); + + __local float lds[256]; + + uint iOffset; + uint oOffset; + __global float2 *lwbIn; + __global float2 *lwbOut; + + float2 R0, R1, R2, R3; + + //uint rw = (me < ((cb[0].u) - batch*32)*2) ? 1 : 0; + uint rw = (me < ((1) - batch*32)*2) ? 1 : 0; + + uint b = 0; + + iOffset = (batch*32 + (me/2))*8; + oOffset = (batch*32 + (me/2))*8; + lwbIn = gbIn + iOffset; + lwbOut = gbOut + oOffset; + + InvPass0(rw, b, me%2, 0, (me/2)*8, lwbIn, lds, lds, &R0, &R1, &R2, &R3); + InvPass1(rw, b, me%2, (me/2)*8, 0, lds, lds, lwbOut, &R0, &R1, &R2, &R3); +} + + diff --git a/RTCP/GPUProc/src/math.cl b/RTCP/GPUProc/src/math.cl new file mode 100644 index 0000000000000000000000000000000000000000..6b4d47579d85448b4d6d57cffc58368d4e4a13bd --- /dev/null +++ b/RTCP/GPUProc/src/math.cl @@ -0,0 +1,10 @@ +float2 cmul(float2 a, float2 b) +{ + return (float2) (a.x * b.x - a.y * b.y, a.x * b.y + a.y * b.x); +} + + +float2 cexp(float ang) +{ + return (float2) (native_cos(ang), native_sin(ang)); +} diff --git a/RTCP/GPUProc/src/octave-core b/RTCP/GPUProc/src/octave-core new file mode 100644 index 0000000000000000000000000000000000000000..608aba01c896492a0c4befa1f98dc1416cc6cf6c Binary files /dev/null and b/RTCP/GPUProc/src/octave-core differ diff --git a/RTCP/GPUProc/test/77_Stations.parset b/RTCP/GPUProc/test/77_Stations.parset new file mode 100644 index 0000000000000000000000000000000000000000..b97352b82f3713e14c8987cc93c41c994d417e73 --- /dev/null +++ b/RTCP/GPUProc/test/77_Stations.parset @@ -0,0 +1,177 @@ +OLAP.nrBitsPerSample = 8 +OLAP.nrTimesInFrame = 16 +Observation.nrSlotsInFrame = 122 +OLAP.nrSecondsOfBuffer = 1.2 +OLAP.CNProc.nrPPFTaps = 16 +OLAP.realTime = false +Observation.startTime = '2010-04-28 17:25:03' +Observation.stopTime = '2010-04-28 17:25:13' +OLAP.storageStationNames = [ST00 .. ST76] +Observation.subbandList = [12 .. 499] +Observation.bandFilter = LBA_10_90 +Observation.channelsPerSubband = 64 +OLAP.CNProc.integrationSteps = 3072 # FIXME: rename +Observation.sampleClock = 200 +OLAP.correctBandPass = true +Observation.beamList = [488 * 0] +OLAP.delayCompensation = true +OLAP.CNProc.partition = PartitionName +PIC.Core.IONProc.PartitionName[0].inputs = [\ +ST00/RSP0..ST00/RSP3,\ +ST01/RSP0..ST01/RSP3,\ +ST02/RSP0..ST02/RSP3,\ +ST03/RSP0..ST03/RSP3,\ +ST04/RSP0..ST04/RSP3,\ +ST05/RSP0..ST05/RSP3,\ +ST06/RSP0..ST06/RSP3,\ +ST07/RSP0..ST07/RSP3,\ +ST08/RSP0..ST08/RSP3,\ +ST09/RSP0..ST09/RSP3,\ +ST10/RSP0..ST10/RSP3,\ +ST11/RSP0..ST11/RSP3,\ +ST12/RSP0..ST12/RSP3,\ +ST13/RSP0..ST13/RSP3,\ +ST14/RSP0..ST14/RSP3,\ +ST15/RSP0..ST15/RSP3,\ +ST16/RSP0..ST16/RSP3,\ +ST17/RSP0..ST17/RSP3,\ +ST18/RSP0..ST18/RSP3,\ +ST19/RSP0..ST19/RSP3,\ +ST20/RSP0..ST20/RSP3,\ +ST21/RSP0..ST21/RSP3,\ +ST22/RSP0..ST22/RSP3,\ +ST23/RSP0..ST23/RSP3,\ +ST24/RSP0..ST24/RSP3,\ +ST25/RSP0..ST25/RSP3,\ +ST26/RSP0..ST26/RSP3,\ +ST27/RSP0..ST27/RSP3,\ +ST28/RSP0..ST28/RSP3,\ +ST29/RSP0..ST29/RSP3,\ +ST30/RSP0..ST30/RSP3,\ +ST31/RSP0..ST31/RSP3,\ +ST32/RSP0..ST32/RSP3,\ +ST33/RSP0..ST33/RSP3,\ +ST34/RSP0..ST34/RSP3,\ +ST35/RSP0..ST35/RSP3,\ +ST36/RSP0..ST36/RSP3,\ +ST37/RSP0..ST37/RSP3,\ +ST38/RSP0..ST38/RSP3,\ +ST39/RSP0..ST39/RSP3,\ +ST40/RSP0..ST40/RSP3,\ +ST41/RSP0..ST41/RSP3,\ +ST42/RSP0..ST42/RSP3,\ +ST43/RSP0..ST43/RSP3,\ +ST44/RSP0..ST44/RSP3,\ +ST45/RSP0..ST45/RSP3,\ +ST46/RSP0..ST46/RSP3,\ +ST47/RSP0..ST47/RSP3,\ +ST48/RSP0..ST48/RSP3,\ +ST49/RSP0..ST49/RSP3,\ +ST50/RSP0..ST50/RSP3,\ +ST51/RSP0..ST51/RSP3,\ +ST52/RSP0..ST52/RSP3,\ +ST53/RSP0..ST53/RSP3,\ +ST54/RSP0..ST54/RSP3,\ +ST55/RSP0..ST55/RSP3,\ +ST56/RSP0..ST56/RSP3,\ +ST57/RSP0..ST57/RSP3,\ +ST58/RSP0..ST58/RSP3,\ +ST59/RSP0..ST59/RSP3,\ +ST60/RSP0..ST60/RSP3,\ +ST61/RSP0..ST61/RSP3,\ +ST62/RSP0..ST62/RSP3,\ +ST63/RSP0..ST63/RSP3,\ +ST64/RSP0..ST64/RSP3,\ +ST65/RSP0..ST65/RSP3,\ +ST66/RSP0..ST66/RSP3,\ +ST67/RSP0..ST67/RSP3,\ +ST68/RSP0..ST68/RSP3,\ +ST69/RSP0..ST69/RSP3,\ +ST70/RSP0..ST70/RSP3,\ +ST71/RSP0..ST71/RSP3,\ +ST72/RSP0..ST72/RSP3,\ +ST73/RSP0..ST73/RSP3,\ +ST74/RSP0..ST74/RSP3,\ +ST75/RSP0..ST75/RSP3,\ +ST76/RSP0..ST76/RSP3] +PIC.Core.Station.ST00.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST01.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST02.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST03.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST04.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST05.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST06.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST07.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST08.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST09.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST10.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST11.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST12.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST13.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST14.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST15.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST16.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST17.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST18.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST19.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST20.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST21.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST22.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST23.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST24.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST25.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST26.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST27.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST28.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST29.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST30.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST31.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST32.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST33.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST34.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST35.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST36.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST37.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST38.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST39.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST40.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST41.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST42.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST43.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST44.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST45.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST46.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST47.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST48.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST49.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST50.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST51.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST52.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST53.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST54.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST55.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST56.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST57.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST58.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST59.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST60.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST61.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST62.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST63.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST64.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST65.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST66.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST67.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST68.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST69.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST70.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST71.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST72.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST73.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST74.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST75.RSP.ports = [ 4 * null: ] +PIC.Core.Station.ST76.RSP.ports = [ 4 * null: ] +Observation.Beam[0].nrTiedArrayBeams = 1 # FIXME: conditional +OLAP.CNProc_CoherentStokes.timeIntegrationFactor = 1 # FIXME: conditional +OLAP.CNProc_IncoherentStokes.timeIntegrationFactor = 1 # FIXME: conditional +OLAP.tiedArrayStationNames = [ST00 .. ST76] # FIXME???: conditional diff --git a/RTCP/GPUProc/test/AARTFAAC.parset b/RTCP/GPUProc/test/AARTFAAC.parset new file mode 100644 index 0000000000000000000000000000000000000000..b9282b23328f949ee615ec144f65b1ad38b48ce4 --- /dev/null +++ b/RTCP/GPUProc/test/AARTFAAC.parset @@ -0,0 +1,2119 @@ +OLAP.CNProc.integrationSteps = 1536 +OLAP.CNProc.phaseOnePsets = [0..287] +#OLAP.CNProc.phaseTwoPsets = [0..4] +OLAP.CNProc.phaseTwoPsets = [0] +OLAP.CNProc.phaseThreePsets = [] +#OLAP.CNProc.phaseOneTwoCores = [0] +OLAP.CNProc.phaseOneTwoCores = [0..19] +OLAP.CNProc.phaseThreeCores = [] +OLAP.CNProc.partition = PartitionName +OLAP.CNProc.tabList = [] +OLAP.realTime = F +OLAP.maxNetworkDelay = .1 +OLAP.correctClocks = T +OLAP.correctBandPass = T +OLAP.nrBitsPerSample = 8 +OLAP.nrTimesInFrame = 16 +OLAP.nrSecondsOfBuffer = 3.5 +OLAP.CNProc.nrPPFTaps = 16 +OLAP.Storage.userName = romein +OLAP.Storage.sshIdentityFile = /home/romein/.ssh/id_rsa +OLAP.Storage.msWriter = /home/romein/tmp/build/gnu_opt/RTCP/Storage/src/Storage_main +OLAP.storageNodeList = [288*0] +OLAP.OLAP_Conn.IONProc_Storage_Ports = [8300..9000] +OLAP.OLAP_Conn.IONProc_Storage_Transport = TCP +OLAP.OLAP_Conn.rawDataOutputOnly = F +OLAP.storageStationNames = [CS002D00,CS002D01,CS002D02,CS002D03,CS002D04,CS002D05,CS002D06,CS002D07,CS002D08,CS002D09,CS002D10,CS002D11,CS002D12,CS002D13,CS002D14,CS002D15,CS002D16,CS002D17,CS002D18,CS002D19,CS002D20,CS002D21,CS002D22,CS002D23,CS002D24,CS002D25,CS002D26,CS002D27,CS002D28,CS002D29,CS002D30,CS002D31,CS002D32,CS002D33,CS002D34,CS002D35,CS002D36,CS002D37,CS002D38,CS002D39,CS002D40,CS002D41,CS002D42,CS002D43,CS002D44,CS002D45,CS002D46,CS002D47,CS003D00,CS003D01,CS003D02,CS003D03,CS003D04,CS003D05,CS003D06,CS003D07,CS003D08,CS003D09,CS003D10,CS003D11,CS003D12,CS003D13,CS003D14,CS003D15,CS003D16,CS003D17,CS003D18,CS003D19,CS003D20,CS003D21,CS003D22,CS003D23,CS003D24,CS003D25,CS003D26,CS003D27,CS003D28,CS003D29,CS003D30,CS003D31,CS003D32,CS003D33,CS003D34,CS003D35,CS003D36,CS003D37,CS003D38,CS003D39,CS003D40,CS003D41,CS003D42,CS003D43,CS003D44,CS003D45,CS003D46,CS003D47,CS004D00,CS004D01,CS004D02,CS004D03,CS004D04,CS004D05,CS004D06,CS004D07,CS004D08,CS004D09,CS004D10,CS004D11,CS004D12,CS004D13,CS004D14,CS004D15,CS004D16,CS004D17,CS004D18,CS004D19,CS004D20,CS004D21,CS004D22,CS004D23,CS004D24,CS004D25,CS004D26,CS004D27,CS004D28,CS004D29,CS004D30,CS004D31,CS004D32,CS004D33,CS004D34,CS004D35,CS004D36,CS004D37,CS004D38,CS004D39,CS004D40,CS004D41,CS004D42,CS004D43,CS004D44,CS004D45,CS004D46,CS004D47,CS005D00,CS005D01,CS005D02,CS005D03,CS005D04,CS005D05,CS005D06,CS005D07,CS005D08,CS005D09,CS005D10,CS005D11,CS005D12,CS005D13,CS005D14,CS005D15,CS005D16,CS005D17,CS005D18,CS005D19,CS005D20,CS005D21,CS005D22,CS005D23,CS005D24,CS005D25,CS005D26,CS005D27,CS005D28,CS005D29,CS005D30,CS005D31,CS005D32,CS005D33,CS005D34,CS005D35,CS005D36,CS005D37,CS005D38,CS005D39,CS005D40,CS005D41,CS005D42,CS005D43,CS005D44,CS005D45,CS005D46,CS005D47,CS006D00,CS006D01,CS006D02,CS006D03,CS006D04,CS006D05,CS006D06,CS006D07,CS006D08,CS006D09,CS006D10,CS006D11,CS006D12,CS006D13,CS006D14,CS006D15,CS006D16,CS006D17,CS006D18,CS006D19,CS006D20,CS006D21,CS006D22,CS006D23,CS006D24,CS006D25,CS006D26,CS006D27,CS006D28,CS006D29,CS006D30,CS006D31,CS006D32,CS006D33,CS006D34,CS006D35,CS006D36,CS006D37,CS006D38,CS006D39,CS006D40,CS006D41,CS006D42,CS006D43,CS006D44,CS006D45,CS006D46,CS006D47,CS006D00,CS006D01,CS006D02,CS006D03,CS006D04,CS006D05,CS006D06,CS006D07,CS006D08,CS006D09,CS006D10,CS006D11,CS006D12,CS006D13,CS006D14,CS006D15,CS006D16,CS006D17,CS006D18,CS006D19,CS006D20,CS006D21,CS006D22,CS006D23,CS006D24,CS006D25,CS006D26,CS006D27,CS006D28,CS006D29,CS006D30,CS006D31,CS006D32,CS006D33,CS006D34,CS006D35,CS006D36,CS006D37,CS006D38,CS006D39,CS006D40,CS006D41,CS006D42,CS006D43,CS006D44,CS006D45,CS006D46,CS006D47] +OLAP.tiedArrayStationNames = [] +Observation.Beam[0].nrTiedArrayBeams = 2 +Observation.Beam[0].TiedArrayBeam[0].angle1 = 1.1 +Observation.Beam[0].TiedArrayBeam[0].angle2 = 1.2 +Observation.Beam[0].TiedArrayBeam[1].angle1 = 1.3 +Observation.Beam[0].TiedArrayBeam[1].angle2 = 1.4 +OLAP.IONProc.integrationSteps = 2 +OLAP.CNProc_CoherentStokes.timeIntegrationFactor = 1 +OLAP.CNProc_IncoherentStokes.timeIntegrationFactor = 1 +OLAP.CNProc_CoherentStokes.channelsPerSubband = 16 +OLAP.CNProc_IncoherentStokes.channelsPerSubband = 16 +OLAP.CNProc_CoherentStokes.which = I # IQUV +OLAP.CNProc_IncoherentStokes.which = I # IQUV +OLAP.PencilInfo.storageNodeList = [] +OLAP.delayCompensation = T +OLAP.DelayComp.positionType = ITRF +OLAP.DelayComp.nrCalcDelays = 16 +OLAP.dispersionMeasure = 2 +Observation.ObserverName = John +Observation.Campaign.name = Test +Observation.Campaign.title = dr. +Observation.Campaign.contact = Jan David +Observation.Campaign.PI = John +Observation.Campaign.CO_I = John +Observation.ProjectName = AARTFAAC +Observation.nrPolarisations = 2 +Observation.antennaSet = LBA_OUTER +Observation.bandFilter = LBA_30_90 +Observation.subbandList = [306..315] +Observation.beamList = [10*0] +Observation.rspBoardList = [10*0] +Observation.rspSlotList = [0..9] +Observation.channelsPerSubband = 64 +Observation.sampleClock = 200 +Observation.nrSlotsInFrame = 10 +Observation.ObsID = 1000000 +Observation.startTime = '2010-04-28 17:25:03' +#Observation.stopTime = '2010-04-28 17:26:03' +Observation.stopTime = '2010-04-28 17:25:13' +Observation.nrBeams = 1 +Observation.AnaBeam[0].directionType = J2000 +Observation.AnaBeam[0].angle1 = 5.2336866848083394 # Cygnus +Observation.AnaBeam[0].angle2 = 0.71094251447010637 +Observation.AnaBeam[0].target = Cygnus A +Observation.Beam[0].directionType = J2000 +Observation.Beam[0].angle1 = 5.2336866848083394 # Cygnus +Observation.Beam[0].angle2 = 0.71094251447010637 +Observation.Beam[0].target = Cygnus A +#Observation.MSNameMask = /tmp/romein${RAID}/L${YEAR}_${MSNUMBER}/SB${SUBBAND}.MS +OLAP.Storage.hosts = [localhost] +Observation.DataProducts.Output_FilteredData.enabled = F +Observation.DataProducts.Output_FilteredData.locations = [10*localhost:/tmp] +Observation.DataProducts.Output_FilteredData.filenames = [SB000.filt,SB001.filt,SB002.filt,SB003.filt,SB004.filt] +Observation.DataProducts.Output_Correlated.enabled = T +Observation.DataProducts.Output_Correlated.locations = [10*localhost:/var/tmp/romein/AARTFAAC-dataset] +Observation.DataProducts.Output_Correlated.filenames = [SB000.MS,SB001.MS,SB002.MS,SB003.MS,SB004.MS] +Observation.DataProducts.Output_IncoherentStokes.enabled = F +Observation.DataProducts.Output_IncoherentStokes.locations = [10*localhost:/tmp] +Observation.DataProducts.Output_IncoherentStokes.filenames = [SB000.incStokes,SB001.incStokes,SB002.incStokes,SB003.incStokes,SB004.incStokes] +Observation.DataProducts.Output_Beamformed.enabled = F +Observation.DataProducts.Output_Beamformed.locations = [4*localhost:/tmp] +Observation.DataProducts.Output_Beamformed.filenames = [CV001.X,CV001.Y,CV002.X,CV002.Y] +#Observation.DataProducts.Output_Beamformed.locations = [2*localhost:/tmp] +#Observation.DataProducts.Output_Beamformed.filenames = [CV001.X,CV001.Y] +Observation.DataProducts.Output_CoherentStokes.enabled = F +Observation.DataProducts.Output_CoherentStokes.locations = [2*localhost:/tmp] +Observation.DataProducts.Output_CoherentStokes.filenames = [Coherent.0.I,Coherent.1.I] +#Observation.DataProducts.Output_CoherentStokes.locations = [8*localhost:/tmp] +#Observation.DataProducts.Output_CoherentStokes.filenames = [Coherent.0.I,Coherent.0.Q,Coherent.0.U,Coherent.0.V,Coherent.1.I,Coherent.1.Q,Coherent.1.U,Coherent.1.V] +Observation.DataProducts.Output_Trigger.enabled = F +Observation.DataProducts.Output_Trigger.locations = [] +Observation.DataProducts.Output_Trigger.filenames = [] +OLAP.Storage.subbandsPerPart = 5 +OLAP.Storage.partsPerStokes = 1 +PIC.Core.IONProc.PartitionName[0].inputs = [CS002D00/RSP0] +PIC.Core.IONProc.PartitionName[1].inputs = [CS002D01/RSP0] +PIC.Core.IONProc.PartitionName[2].inputs = [CS002D02/RSP0] +PIC.Core.IONProc.PartitionName[3].inputs = [CS002D03/RSP0] +PIC.Core.IONProc.PartitionName[4].inputs = [CS002D04/RSP0] +PIC.Core.IONProc.PartitionName[5].inputs = [CS002D05/RSP0] +PIC.Core.IONProc.PartitionName[6].inputs = [CS002D06/RSP0] +PIC.Core.IONProc.PartitionName[7].inputs = [CS002D07/RSP0] +PIC.Core.IONProc.PartitionName[8].inputs = [CS002D08/RSP0] +PIC.Core.IONProc.PartitionName[9].inputs = [CS002D09/RSP0] +PIC.Core.IONProc.PartitionName[10].inputs = [CS002D10/RSP0] +PIC.Core.IONProc.PartitionName[11].inputs = [CS002D11/RSP0] +PIC.Core.IONProc.PartitionName[12].inputs = [CS002D12/RSP0] +PIC.Core.IONProc.PartitionName[13].inputs = [CS002D13/RSP0] +PIC.Core.IONProc.PartitionName[14].inputs = [CS002D14/RSP0] +PIC.Core.IONProc.PartitionName[15].inputs = [CS002D15/RSP0] +PIC.Core.IONProc.PartitionName[16].inputs = [CS002D16/RSP0] +PIC.Core.IONProc.PartitionName[17].inputs = [CS002D17/RSP0] +PIC.Core.IONProc.PartitionName[18].inputs = [CS002D18/RSP0] +PIC.Core.IONProc.PartitionName[19].inputs = [CS002D19/RSP0] +PIC.Core.IONProc.PartitionName[20].inputs = [CS002D20/RSP0] +PIC.Core.IONProc.PartitionName[21].inputs = [CS002D21/RSP0] +PIC.Core.IONProc.PartitionName[22].inputs = [CS002D22/RSP0] +PIC.Core.IONProc.PartitionName[23].inputs = [CS002D23/RSP0] +PIC.Core.IONProc.PartitionName[24].inputs = [CS002D24/RSP0] +PIC.Core.IONProc.PartitionName[25].inputs = [CS002D25/RSP0] +PIC.Core.IONProc.PartitionName[26].inputs = [CS002D26/RSP0] +PIC.Core.IONProc.PartitionName[27].inputs = [CS002D27/RSP0] +PIC.Core.IONProc.PartitionName[28].inputs = [CS002D28/RSP0] +PIC.Core.IONProc.PartitionName[29].inputs = [CS002D29/RSP0] +PIC.Core.IONProc.PartitionName[30].inputs = [CS002D30/RSP0] +PIC.Core.IONProc.PartitionName[31].inputs = [CS002D31/RSP0] +PIC.Core.IONProc.PartitionName[32].inputs = [CS002D32/RSP0] +PIC.Core.IONProc.PartitionName[33].inputs = [CS002D33/RSP0] +PIC.Core.IONProc.PartitionName[34].inputs = [CS002D34/RSP0] +PIC.Core.IONProc.PartitionName[35].inputs = [CS002D35/RSP0] +PIC.Core.IONProc.PartitionName[36].inputs = [CS002D36/RSP0] +PIC.Core.IONProc.PartitionName[37].inputs = [CS002D37/RSP0] +PIC.Core.IONProc.PartitionName[38].inputs = [CS002D38/RSP0] +PIC.Core.IONProc.PartitionName[39].inputs = [CS002D39/RSP0] +PIC.Core.IONProc.PartitionName[40].inputs = [CS002D40/RSP0] +PIC.Core.IONProc.PartitionName[41].inputs = [CS002D41/RSP0] +PIC.Core.IONProc.PartitionName[42].inputs = [CS002D42/RSP0] +PIC.Core.IONProc.PartitionName[43].inputs = [CS002D43/RSP0] +PIC.Core.IONProc.PartitionName[44].inputs = [CS002D44/RSP0] +PIC.Core.IONProc.PartitionName[45].inputs = [CS002D45/RSP0] +PIC.Core.IONProc.PartitionName[46].inputs = [CS002D46/RSP0] +PIC.Core.IONProc.PartitionName[47].inputs = [CS002D47/RSP0] +PIC.Core.IONProc.PartitionName[48].inputs = [CS003D00/RSP0] +PIC.Core.IONProc.PartitionName[49].inputs = [CS003D01/RSP0] +PIC.Core.IONProc.PartitionName[50].inputs = [CS003D02/RSP0] +PIC.Core.IONProc.PartitionName[51].inputs = [CS003D03/RSP0] +PIC.Core.IONProc.PartitionName[52].inputs = [CS003D04/RSP0] +PIC.Core.IONProc.PartitionName[53].inputs = [CS003D05/RSP0] +PIC.Core.IONProc.PartitionName[54].inputs = [CS003D06/RSP0] +PIC.Core.IONProc.PartitionName[55].inputs = [CS003D07/RSP0] +PIC.Core.IONProc.PartitionName[56].inputs = [CS003D08/RSP0] +PIC.Core.IONProc.PartitionName[57].inputs = [CS003D09/RSP0] +PIC.Core.IONProc.PartitionName[58].inputs = [CS003D10/RSP0] +PIC.Core.IONProc.PartitionName[59].inputs = [CS003D11/RSP0] +PIC.Core.IONProc.PartitionName[60].inputs = [CS003D12/RSP0] +PIC.Core.IONProc.PartitionName[61].inputs = [CS003D13/RSP0] +PIC.Core.IONProc.PartitionName[62].inputs = [CS003D14/RSP0] +PIC.Core.IONProc.PartitionName[63].inputs = [CS003D15/RSP0] +PIC.Core.IONProc.PartitionName[64].inputs = [CS003D16/RSP0] +PIC.Core.IONProc.PartitionName[65].inputs = [CS003D17/RSP0] +PIC.Core.IONProc.PartitionName[66].inputs = [CS003D18/RSP0] +PIC.Core.IONProc.PartitionName[67].inputs = [CS003D19/RSP0] +PIC.Core.IONProc.PartitionName[68].inputs = [CS003D20/RSP0] +PIC.Core.IONProc.PartitionName[69].inputs = [CS003D21/RSP0] +PIC.Core.IONProc.PartitionName[70].inputs = [CS003D22/RSP0] +PIC.Core.IONProc.PartitionName[71].inputs = [CS003D23/RSP0] +PIC.Core.IONProc.PartitionName[72].inputs = [CS003D24/RSP0] +PIC.Core.IONProc.PartitionName[73].inputs = [CS003D25/RSP0] +PIC.Core.IONProc.PartitionName[74].inputs = [CS003D26/RSP0] +PIC.Core.IONProc.PartitionName[75].inputs = [CS003D27/RSP0] +PIC.Core.IONProc.PartitionName[76].inputs = [CS003D28/RSP0] +PIC.Core.IONProc.PartitionName[77].inputs = [CS003D29/RSP0] +PIC.Core.IONProc.PartitionName[78].inputs = [CS003D30/RSP0] +PIC.Core.IONProc.PartitionName[79].inputs = [CS003D31/RSP0] +PIC.Core.IONProc.PartitionName[80].inputs = [CS003D32/RSP0] +PIC.Core.IONProc.PartitionName[81].inputs = [CS003D33/RSP0] +PIC.Core.IONProc.PartitionName[82].inputs = [CS003D34/RSP0] +PIC.Core.IONProc.PartitionName[83].inputs = [CS003D35/RSP0] +PIC.Core.IONProc.PartitionName[84].inputs = [CS003D36/RSP0] +PIC.Core.IONProc.PartitionName[85].inputs = [CS003D37/RSP0] +PIC.Core.IONProc.PartitionName[86].inputs = [CS003D38/RSP0] +PIC.Core.IONProc.PartitionName[87].inputs = [CS003D39/RSP0] +PIC.Core.IONProc.PartitionName[88].inputs = [CS003D40/RSP0] +PIC.Core.IONProc.PartitionName[89].inputs = [CS003D41/RSP0] +PIC.Core.IONProc.PartitionName[90].inputs = [CS003D42/RSP0] +PIC.Core.IONProc.PartitionName[91].inputs = [CS003D43/RSP0] +PIC.Core.IONProc.PartitionName[92].inputs = [CS003D44/RSP0] +PIC.Core.IONProc.PartitionName[93].inputs = [CS003D45/RSP0] +PIC.Core.IONProc.PartitionName[94].inputs = [CS003D46/RSP0] +PIC.Core.IONProc.PartitionName[95].inputs = [CS003D47/RSP0] +PIC.Core.IONProc.PartitionName[96].inputs = [CS004D00/RSP0] +PIC.Core.IONProc.PartitionName[97].inputs = [CS004D01/RSP0] +PIC.Core.IONProc.PartitionName[98].inputs = [CS004D02/RSP0] +PIC.Core.IONProc.PartitionName[99].inputs = [CS004D03/RSP0] +PIC.Core.IONProc.PartitionName[100].inputs = [CS004D04/RSP0] +PIC.Core.IONProc.PartitionName[101].inputs = [CS004D05/RSP0] +PIC.Core.IONProc.PartitionName[102].inputs = [CS004D06/RSP0] +PIC.Core.IONProc.PartitionName[103].inputs = [CS004D07/RSP0] +PIC.Core.IONProc.PartitionName[104].inputs = [CS004D08/RSP0] +PIC.Core.IONProc.PartitionName[105].inputs = [CS004D09/RSP0] +PIC.Core.IONProc.PartitionName[106].inputs = [CS004D10/RSP0] +PIC.Core.IONProc.PartitionName[107].inputs = [CS004D11/RSP0] +PIC.Core.IONProc.PartitionName[108].inputs = [CS004D12/RSP0] +PIC.Core.IONProc.PartitionName[109].inputs = [CS004D13/RSP0] +PIC.Core.IONProc.PartitionName[110].inputs = [CS004D14/RSP0] +PIC.Core.IONProc.PartitionName[111].inputs = [CS004D15/RSP0] +PIC.Core.IONProc.PartitionName[112].inputs = [CS004D16/RSP0] +PIC.Core.IONProc.PartitionName[113].inputs = [CS004D17/RSP0] +PIC.Core.IONProc.PartitionName[114].inputs = [CS004D18/RSP0] +PIC.Core.IONProc.PartitionName[115].inputs = [CS004D19/RSP0] +PIC.Core.IONProc.PartitionName[116].inputs = [CS004D20/RSP0] +PIC.Core.IONProc.PartitionName[117].inputs = [CS004D21/RSP0] +PIC.Core.IONProc.PartitionName[118].inputs = [CS004D22/RSP0] +PIC.Core.IONProc.PartitionName[119].inputs = [CS004D23/RSP0] +PIC.Core.IONProc.PartitionName[120].inputs = [CS004D24/RSP0] +PIC.Core.IONProc.PartitionName[121].inputs = [CS004D25/RSP0] +PIC.Core.IONProc.PartitionName[122].inputs = [CS004D26/RSP0] +PIC.Core.IONProc.PartitionName[123].inputs = [CS004D27/RSP0] +PIC.Core.IONProc.PartitionName[124].inputs = [CS004D28/RSP0] +PIC.Core.IONProc.PartitionName[125].inputs = [CS004D29/RSP0] +PIC.Core.IONProc.PartitionName[126].inputs = [CS004D30/RSP0] +PIC.Core.IONProc.PartitionName[127].inputs = [CS004D31/RSP0] +PIC.Core.IONProc.PartitionName[128].inputs = [CS004D32/RSP0] +PIC.Core.IONProc.PartitionName[129].inputs = [CS004D33/RSP0] +PIC.Core.IONProc.PartitionName[130].inputs = [CS004D34/RSP0] +PIC.Core.IONProc.PartitionName[131].inputs = [CS004D35/RSP0] +PIC.Core.IONProc.PartitionName[132].inputs = [CS004D36/RSP0] +PIC.Core.IONProc.PartitionName[133].inputs = [CS004D37/RSP0] +PIC.Core.IONProc.PartitionName[134].inputs = [CS004D38/RSP0] +PIC.Core.IONProc.PartitionName[135].inputs = [CS004D39/RSP0] +PIC.Core.IONProc.PartitionName[136].inputs = [CS004D40/RSP0] +PIC.Core.IONProc.PartitionName[137].inputs = [CS004D41/RSP0] +PIC.Core.IONProc.PartitionName[138].inputs = [CS004D42/RSP0] +PIC.Core.IONProc.PartitionName[139].inputs = [CS004D43/RSP0] +PIC.Core.IONProc.PartitionName[140].inputs = [CS004D44/RSP0] +PIC.Core.IONProc.PartitionName[141].inputs = [CS004D45/RSP0] +PIC.Core.IONProc.PartitionName[142].inputs = [CS004D46/RSP0] +PIC.Core.IONProc.PartitionName[143].inputs = [CS004D47/RSP0] +PIC.Core.IONProc.PartitionName[144].inputs = [CS005D00/RSP0] +PIC.Core.IONProc.PartitionName[145].inputs = [CS005D01/RSP0] +PIC.Core.IONProc.PartitionName[146].inputs = [CS005D02/RSP0] +PIC.Core.IONProc.PartitionName[147].inputs = [CS005D03/RSP0] +PIC.Core.IONProc.PartitionName[148].inputs = [CS005D04/RSP0] +PIC.Core.IONProc.PartitionName[149].inputs = [CS005D05/RSP0] +PIC.Core.IONProc.PartitionName[150].inputs = [CS005D06/RSP0] +PIC.Core.IONProc.PartitionName[151].inputs = [CS005D07/RSP0] +PIC.Core.IONProc.PartitionName[152].inputs = [CS005D08/RSP0] +PIC.Core.IONProc.PartitionName[153].inputs = [CS005D09/RSP0] +PIC.Core.IONProc.PartitionName[154].inputs = [CS005D10/RSP0] +PIC.Core.IONProc.PartitionName[155].inputs = [CS005D11/RSP0] +PIC.Core.IONProc.PartitionName[156].inputs = [CS005D12/RSP0] +PIC.Core.IONProc.PartitionName[157].inputs = [CS005D13/RSP0] +PIC.Core.IONProc.PartitionName[158].inputs = [CS005D14/RSP0] +PIC.Core.IONProc.PartitionName[159].inputs = [CS005D15/RSP0] +PIC.Core.IONProc.PartitionName[160].inputs = [CS005D16/RSP0] +PIC.Core.IONProc.PartitionName[161].inputs = [CS005D17/RSP0] +PIC.Core.IONProc.PartitionName[162].inputs = [CS005D18/RSP0] +PIC.Core.IONProc.PartitionName[163].inputs = [CS005D19/RSP0] +PIC.Core.IONProc.PartitionName[164].inputs = [CS005D20/RSP0] +PIC.Core.IONProc.PartitionName[165].inputs = [CS005D21/RSP0] +PIC.Core.IONProc.PartitionName[166].inputs = [CS005D22/RSP0] +PIC.Core.IONProc.PartitionName[167].inputs = [CS005D23/RSP0] +PIC.Core.IONProc.PartitionName[168].inputs = [CS005D24/RSP0] +PIC.Core.IONProc.PartitionName[169].inputs = [CS005D25/RSP0] +PIC.Core.IONProc.PartitionName[170].inputs = [CS005D26/RSP0] +PIC.Core.IONProc.PartitionName[171].inputs = [CS005D27/RSP0] +PIC.Core.IONProc.PartitionName[172].inputs = [CS005D28/RSP0] +PIC.Core.IONProc.PartitionName[173].inputs = [CS005D29/RSP0] +PIC.Core.IONProc.PartitionName[174].inputs = [CS005D30/RSP0] +PIC.Core.IONProc.PartitionName[175].inputs = [CS005D31/RSP0] +PIC.Core.IONProc.PartitionName[176].inputs = [CS005D32/RSP0] +PIC.Core.IONProc.PartitionName[177].inputs = [CS005D33/RSP0] +PIC.Core.IONProc.PartitionName[178].inputs = [CS005D34/RSP0] +PIC.Core.IONProc.PartitionName[179].inputs = [CS005D35/RSP0] +PIC.Core.IONProc.PartitionName[180].inputs = [CS005D36/RSP0] +PIC.Core.IONProc.PartitionName[181].inputs = [CS005D37/RSP0] +PIC.Core.IONProc.PartitionName[182].inputs = [CS005D38/RSP0] +PIC.Core.IONProc.PartitionName[183].inputs = [CS005D39/RSP0] +PIC.Core.IONProc.PartitionName[184].inputs = [CS005D40/RSP0] +PIC.Core.IONProc.PartitionName[185].inputs = [CS005D41/RSP0] +PIC.Core.IONProc.PartitionName[186].inputs = [CS005D42/RSP0] +PIC.Core.IONProc.PartitionName[187].inputs = [CS005D43/RSP0] +PIC.Core.IONProc.PartitionName[188].inputs = [CS005D44/RSP0] +PIC.Core.IONProc.PartitionName[189].inputs = [CS005D45/RSP0] +PIC.Core.IONProc.PartitionName[190].inputs = [CS005D46/RSP0] +PIC.Core.IONProc.PartitionName[191].inputs = [CS005D47/RSP0] +PIC.Core.IONProc.PartitionName[192].inputs = [CS006D00/RSP0] +PIC.Core.IONProc.PartitionName[193].inputs = [CS006D01/RSP0] +PIC.Core.IONProc.PartitionName[194].inputs = [CS006D02/RSP0] +PIC.Core.IONProc.PartitionName[195].inputs = [CS006D03/RSP0] +PIC.Core.IONProc.PartitionName[196].inputs = [CS006D04/RSP0] +PIC.Core.IONProc.PartitionName[197].inputs = [CS006D05/RSP0] +PIC.Core.IONProc.PartitionName[198].inputs = [CS006D06/RSP0] +PIC.Core.IONProc.PartitionName[199].inputs = [CS006D07/RSP0] +PIC.Core.IONProc.PartitionName[200].inputs = [CS006D08/RSP0] +PIC.Core.IONProc.PartitionName[201].inputs = [CS006D09/RSP0] +PIC.Core.IONProc.PartitionName[202].inputs = [CS006D10/RSP0] +PIC.Core.IONProc.PartitionName[203].inputs = [CS006D11/RSP0] +PIC.Core.IONProc.PartitionName[204].inputs = [CS006D12/RSP0] +PIC.Core.IONProc.PartitionName[205].inputs = [CS006D13/RSP0] +PIC.Core.IONProc.PartitionName[206].inputs = [CS006D14/RSP0] +PIC.Core.IONProc.PartitionName[207].inputs = [CS006D15/RSP0] +PIC.Core.IONProc.PartitionName[208].inputs = [CS006D16/RSP0] +PIC.Core.IONProc.PartitionName[209].inputs = [CS006D17/RSP0] +PIC.Core.IONProc.PartitionName[210].inputs = [CS006D18/RSP0] +PIC.Core.IONProc.PartitionName[211].inputs = [CS006D19/RSP0] +PIC.Core.IONProc.PartitionName[212].inputs = [CS006D20/RSP0] +PIC.Core.IONProc.PartitionName[213].inputs = [CS006D21/RSP0] +PIC.Core.IONProc.PartitionName[214].inputs = [CS006D22/RSP0] +PIC.Core.IONProc.PartitionName[215].inputs = [CS006D23/RSP0] +PIC.Core.IONProc.PartitionName[216].inputs = [CS006D24/RSP0] +PIC.Core.IONProc.PartitionName[217].inputs = [CS006D25/RSP0] +PIC.Core.IONProc.PartitionName[218].inputs = [CS006D26/RSP0] +PIC.Core.IONProc.PartitionName[219].inputs = [CS006D27/RSP0] +PIC.Core.IONProc.PartitionName[220].inputs = [CS006D28/RSP0] +PIC.Core.IONProc.PartitionName[221].inputs = [CS006D29/RSP0] +PIC.Core.IONProc.PartitionName[222].inputs = [CS006D30/RSP0] +PIC.Core.IONProc.PartitionName[223].inputs = [CS006D31/RSP0] +PIC.Core.IONProc.PartitionName[224].inputs = [CS006D32/RSP0] +PIC.Core.IONProc.PartitionName[225].inputs = [CS006D33/RSP0] +PIC.Core.IONProc.PartitionName[226].inputs = [CS006D34/RSP0] +PIC.Core.IONProc.PartitionName[227].inputs = [CS006D35/RSP0] +PIC.Core.IONProc.PartitionName[228].inputs = [CS006D36/RSP0] +PIC.Core.IONProc.PartitionName[229].inputs = [CS006D37/RSP0] +PIC.Core.IONProc.PartitionName[230].inputs = [CS006D38/RSP0] +PIC.Core.IONProc.PartitionName[231].inputs = [CS006D39/RSP0] +PIC.Core.IONProc.PartitionName[232].inputs = [CS006D40/RSP0] +PIC.Core.IONProc.PartitionName[233].inputs = [CS006D41/RSP0] +PIC.Core.IONProc.PartitionName[234].inputs = [CS006D42/RSP0] +PIC.Core.IONProc.PartitionName[235].inputs = [CS006D43/RSP0] +PIC.Core.IONProc.PartitionName[236].inputs = [CS006D44/RSP0] +PIC.Core.IONProc.PartitionName[237].inputs = [CS006D45/RSP0] +PIC.Core.IONProc.PartitionName[238].inputs = [CS006D46/RSP0] +PIC.Core.IONProc.PartitionName[239].inputs = [CS006D47/RSP0] +PIC.Core.IONProc.PartitionName[240].inputs = [CS007D00/RSP0] +PIC.Core.IONProc.PartitionName[241].inputs = [CS007D01/RSP0] +PIC.Core.IONProc.PartitionName[242].inputs = [CS007D02/RSP0] +PIC.Core.IONProc.PartitionName[243].inputs = [CS007D03/RSP0] +PIC.Core.IONProc.PartitionName[244].inputs = [CS007D04/RSP0] +PIC.Core.IONProc.PartitionName[245].inputs = [CS007D05/RSP0] +PIC.Core.IONProc.PartitionName[246].inputs = [CS007D06/RSP0] +PIC.Core.IONProc.PartitionName[247].inputs = [CS007D07/RSP0] +PIC.Core.IONProc.PartitionName[248].inputs = [CS007D08/RSP0] +PIC.Core.IONProc.PartitionName[249].inputs = [CS007D09/RSP0] +PIC.Core.IONProc.PartitionName[250].inputs = [CS007D10/RSP0] +PIC.Core.IONProc.PartitionName[251].inputs = [CS007D11/RSP0] +PIC.Core.IONProc.PartitionName[252].inputs = [CS007D12/RSP0] +PIC.Core.IONProc.PartitionName[253].inputs = [CS007D13/RSP0] +PIC.Core.IONProc.PartitionName[254].inputs = [CS007D14/RSP0] +PIC.Core.IONProc.PartitionName[255].inputs = [CS007D15/RSP0] +PIC.Core.IONProc.PartitionName[256].inputs = [CS007D16/RSP0] +PIC.Core.IONProc.PartitionName[257].inputs = [CS007D17/RSP0] +PIC.Core.IONProc.PartitionName[258].inputs = [CS007D18/RSP0] +PIC.Core.IONProc.PartitionName[259].inputs = [CS007D19/RSP0] +PIC.Core.IONProc.PartitionName[260].inputs = [CS007D20/RSP0] +PIC.Core.IONProc.PartitionName[261].inputs = [CS007D21/RSP0] +PIC.Core.IONProc.PartitionName[262].inputs = [CS007D22/RSP0] +PIC.Core.IONProc.PartitionName[263].inputs = [CS007D23/RSP0] +PIC.Core.IONProc.PartitionName[264].inputs = [CS007D24/RSP0] +PIC.Core.IONProc.PartitionName[265].inputs = [CS007D25/RSP0] +PIC.Core.IONProc.PartitionName[266].inputs = [CS007D26/RSP0] +PIC.Core.IONProc.PartitionName[267].inputs = [CS007D27/RSP0] +PIC.Core.IONProc.PartitionName[268].inputs = [CS007D28/RSP0] +PIC.Core.IONProc.PartitionName[269].inputs = [CS007D29/RSP0] +PIC.Core.IONProc.PartitionName[270].inputs = [CS007D30/RSP0] +PIC.Core.IONProc.PartitionName[271].inputs = [CS007D31/RSP0] +PIC.Core.IONProc.PartitionName[272].inputs = [CS007D32/RSP0] +PIC.Core.IONProc.PartitionName[273].inputs = [CS007D33/RSP0] +PIC.Core.IONProc.PartitionName[274].inputs = [CS007D34/RSP0] +PIC.Core.IONProc.PartitionName[275].inputs = [CS007D35/RSP0] +PIC.Core.IONProc.PartitionName[276].inputs = [CS007D36/RSP0] +PIC.Core.IONProc.PartitionName[277].inputs = [CS007D37/RSP0] +PIC.Core.IONProc.PartitionName[278].inputs = [CS007D38/RSP0] +PIC.Core.IONProc.PartitionName[279].inputs = [CS007D39/RSP0] +PIC.Core.IONProc.PartitionName[280].inputs = [CS007D40/RSP0] +PIC.Core.IONProc.PartitionName[281].inputs = [CS007D41/RSP0] +PIC.Core.IONProc.PartitionName[282].inputs = [CS007D42/RSP0] +PIC.Core.IONProc.PartitionName[283].inputs = [CS007D43/RSP0] +PIC.Core.IONProc.PartitionName[284].inputs = [CS007D44/RSP0] +PIC.Core.IONProc.PartitionName[285].inputs = [CS007D45/RSP0] +PIC.Core.IONProc.PartitionName[286].inputs = [CS007D46/RSP0] +PIC.Core.IONProc.PartitionName[287].inputs = [CS007D47/RSP0] +PIC.Core.Station.CS002D00.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D00.raw] +PIC.Core.Station.CS002D01.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D01.raw] +PIC.Core.Station.CS002D02.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D02.raw] +PIC.Core.Station.CS002D03.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D03.raw] +PIC.Core.Station.CS002D04.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D04.raw] +PIC.Core.Station.CS002D05.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D05.raw] +PIC.Core.Station.CS002D06.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D06.raw] +PIC.Core.Station.CS002D07.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D07.raw] +PIC.Core.Station.CS002D08.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D08.raw] +PIC.Core.Station.CS002D09.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D09.raw] +PIC.Core.Station.CS002D10.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D10.raw] +PIC.Core.Station.CS002D11.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D11.raw] +PIC.Core.Station.CS002D12.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D12.raw] +PIC.Core.Station.CS002D13.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D13.raw] +PIC.Core.Station.CS002D14.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D14.raw] +PIC.Core.Station.CS002D15.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D15.raw] +PIC.Core.Station.CS002D16.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D16.raw] +PIC.Core.Station.CS002D17.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D17.raw] +PIC.Core.Station.CS002D18.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D18.raw] +PIC.Core.Station.CS002D19.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D19.raw] +PIC.Core.Station.CS002D20.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D20.raw] +PIC.Core.Station.CS002D21.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D21.raw] +PIC.Core.Station.CS002D22.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D22.raw] +PIC.Core.Station.CS002D23.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D23.raw] +PIC.Core.Station.CS002D24.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D24.raw] +PIC.Core.Station.CS002D25.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D25.raw] +PIC.Core.Station.CS002D26.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D26.raw] +PIC.Core.Station.CS002D27.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D27.raw] +PIC.Core.Station.CS002D28.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D28.raw] +PIC.Core.Station.CS002D29.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D29.raw] +PIC.Core.Station.CS002D30.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D30.raw] +PIC.Core.Station.CS002D31.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D31.raw] +PIC.Core.Station.CS002D32.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D32.raw] +PIC.Core.Station.CS002D33.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D33.raw] +PIC.Core.Station.CS002D34.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D34.raw] +PIC.Core.Station.CS002D35.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D35.raw] +PIC.Core.Station.CS002D36.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D36.raw] +PIC.Core.Station.CS002D37.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D37.raw] +PIC.Core.Station.CS002D38.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D38.raw] +PIC.Core.Station.CS002D39.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D39.raw] +PIC.Core.Station.CS002D40.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D40.raw] +PIC.Core.Station.CS002D41.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D41.raw] +PIC.Core.Station.CS002D42.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D42.raw] +PIC.Core.Station.CS002D43.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D43.raw] +PIC.Core.Station.CS002D44.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D44.raw] +PIC.Core.Station.CS002D45.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D45.raw] +PIC.Core.Station.CS002D46.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D46.raw] +PIC.Core.Station.CS002D47.RSP.ports = [file:/local/AARTFAAC-dataset/CS002D47.raw] +PIC.Core.Station.CS003D00.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D00.raw] +PIC.Core.Station.CS003D01.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D01.raw] +PIC.Core.Station.CS003D02.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D02.raw] +PIC.Core.Station.CS003D03.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D03.raw] +PIC.Core.Station.CS003D04.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D04.raw] +PIC.Core.Station.CS003D05.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D05.raw] +PIC.Core.Station.CS003D06.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D06.raw] +PIC.Core.Station.CS003D07.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D07.raw] +PIC.Core.Station.CS003D08.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D08.raw] +PIC.Core.Station.CS003D09.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D09.raw] +PIC.Core.Station.CS003D10.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D10.raw] +PIC.Core.Station.CS003D11.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D11.raw] +PIC.Core.Station.CS003D12.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D12.raw] +PIC.Core.Station.CS003D13.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D13.raw] +PIC.Core.Station.CS003D14.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D14.raw] +PIC.Core.Station.CS003D15.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D15.raw] +PIC.Core.Station.CS003D16.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D16.raw] +PIC.Core.Station.CS003D17.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D17.raw] +PIC.Core.Station.CS003D18.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D18.raw] +PIC.Core.Station.CS003D19.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D19.raw] +PIC.Core.Station.CS003D20.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D20.raw] +PIC.Core.Station.CS003D21.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D21.raw] +PIC.Core.Station.CS003D22.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D22.raw] +PIC.Core.Station.CS003D23.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D23.raw] +PIC.Core.Station.CS003D24.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D24.raw] +PIC.Core.Station.CS003D25.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D25.raw] +PIC.Core.Station.CS003D26.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D26.raw] +PIC.Core.Station.CS003D27.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D27.raw] +PIC.Core.Station.CS003D28.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D28.raw] +PIC.Core.Station.CS003D29.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D29.raw] +PIC.Core.Station.CS003D30.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D30.raw] +PIC.Core.Station.CS003D31.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D31.raw] +PIC.Core.Station.CS003D32.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D32.raw] +PIC.Core.Station.CS003D33.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D33.raw] +PIC.Core.Station.CS003D34.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D34.raw] +PIC.Core.Station.CS003D35.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D35.raw] +PIC.Core.Station.CS003D36.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D36.raw] +PIC.Core.Station.CS003D37.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D37.raw] +PIC.Core.Station.CS003D38.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D38.raw] +PIC.Core.Station.CS003D39.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D39.raw] +PIC.Core.Station.CS003D40.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D40.raw] +PIC.Core.Station.CS003D41.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D41.raw] +PIC.Core.Station.CS003D42.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D42.raw] +PIC.Core.Station.CS003D43.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D43.raw] +PIC.Core.Station.CS003D44.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D44.raw] +PIC.Core.Station.CS003D45.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D45.raw] +PIC.Core.Station.CS003D46.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D46.raw] +PIC.Core.Station.CS003D47.RSP.ports = [file:/local/AARTFAAC-dataset/CS003D47.raw] +PIC.Core.Station.CS004D00.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D00.raw] +PIC.Core.Station.CS004D01.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D01.raw] +PIC.Core.Station.CS004D02.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D02.raw] +PIC.Core.Station.CS004D03.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D03.raw] +PIC.Core.Station.CS004D04.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D04.raw] +PIC.Core.Station.CS004D05.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D05.raw] +PIC.Core.Station.CS004D06.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D06.raw] +PIC.Core.Station.CS004D07.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D07.raw] +PIC.Core.Station.CS004D08.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D08.raw] +PIC.Core.Station.CS004D09.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D09.raw] +PIC.Core.Station.CS004D10.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D10.raw] +PIC.Core.Station.CS004D11.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D11.raw] +PIC.Core.Station.CS004D12.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D12.raw] +PIC.Core.Station.CS004D13.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D13.raw] +PIC.Core.Station.CS004D14.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D14.raw] +PIC.Core.Station.CS004D15.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D15.raw] +PIC.Core.Station.CS004D16.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D16.raw] +PIC.Core.Station.CS004D17.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D17.raw] +PIC.Core.Station.CS004D18.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D18.raw] +PIC.Core.Station.CS004D19.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D19.raw] +PIC.Core.Station.CS004D20.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D20.raw] +PIC.Core.Station.CS004D21.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D21.raw] +PIC.Core.Station.CS004D22.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D22.raw] +PIC.Core.Station.CS004D23.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D23.raw] +PIC.Core.Station.CS004D24.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D24.raw] +PIC.Core.Station.CS004D25.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D25.raw] +PIC.Core.Station.CS004D26.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D26.raw] +PIC.Core.Station.CS004D27.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D27.raw] +PIC.Core.Station.CS004D28.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D28.raw] +PIC.Core.Station.CS004D29.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D29.raw] +PIC.Core.Station.CS004D30.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D30.raw] +PIC.Core.Station.CS004D31.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D31.raw] +PIC.Core.Station.CS004D32.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D32.raw] +PIC.Core.Station.CS004D33.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D33.raw] +PIC.Core.Station.CS004D34.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D34.raw] +PIC.Core.Station.CS004D35.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D35.raw] +PIC.Core.Station.CS004D36.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D36.raw] +PIC.Core.Station.CS004D37.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D37.raw] +PIC.Core.Station.CS004D38.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D38.raw] +PIC.Core.Station.CS004D39.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D39.raw] +PIC.Core.Station.CS004D40.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D40.raw] +PIC.Core.Station.CS004D41.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D41.raw] +PIC.Core.Station.CS004D42.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D42.raw] +PIC.Core.Station.CS004D43.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D43.raw] +PIC.Core.Station.CS004D44.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D44.raw] +PIC.Core.Station.CS004D45.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D45.raw] +PIC.Core.Station.CS004D46.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D46.raw] +PIC.Core.Station.CS004D47.RSP.ports = [file:/local/AARTFAAC-dataset/CS004D47.raw] +PIC.Core.Station.CS005D00.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D00.raw] +PIC.Core.Station.CS005D01.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D01.raw] +PIC.Core.Station.CS005D02.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D02.raw] +PIC.Core.Station.CS005D03.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D03.raw] +PIC.Core.Station.CS005D04.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D04.raw] +PIC.Core.Station.CS005D05.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D05.raw] +PIC.Core.Station.CS005D06.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D06.raw] +PIC.Core.Station.CS005D07.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D07.raw] +PIC.Core.Station.CS005D08.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D08.raw] +PIC.Core.Station.CS005D09.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D09.raw] +PIC.Core.Station.CS005D10.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D10.raw] +PIC.Core.Station.CS005D11.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D11.raw] +PIC.Core.Station.CS005D12.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D12.raw] +PIC.Core.Station.CS005D13.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D13.raw] +PIC.Core.Station.CS005D14.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D14.raw] +PIC.Core.Station.CS005D15.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D15.raw] +PIC.Core.Station.CS005D16.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D16.raw] +PIC.Core.Station.CS005D17.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D17.raw] +PIC.Core.Station.CS005D18.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D18.raw] +PIC.Core.Station.CS005D19.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D19.raw] +PIC.Core.Station.CS005D20.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D20.raw] +PIC.Core.Station.CS005D21.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D21.raw] +PIC.Core.Station.CS005D22.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D22.raw] +PIC.Core.Station.CS005D23.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D23.raw] +PIC.Core.Station.CS005D24.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D24.raw] +PIC.Core.Station.CS005D25.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D25.raw] +PIC.Core.Station.CS005D26.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D26.raw] +PIC.Core.Station.CS005D27.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D27.raw] +PIC.Core.Station.CS005D28.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D28.raw] +PIC.Core.Station.CS005D29.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D29.raw] +PIC.Core.Station.CS005D30.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D30.raw] +PIC.Core.Station.CS005D31.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D31.raw] +PIC.Core.Station.CS005D32.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D32.raw] +PIC.Core.Station.CS005D33.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D33.raw] +PIC.Core.Station.CS005D34.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D34.raw] +PIC.Core.Station.CS005D35.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D35.raw] +PIC.Core.Station.CS005D36.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D36.raw] +PIC.Core.Station.CS005D37.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D37.raw] +PIC.Core.Station.CS005D38.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D38.raw] +PIC.Core.Station.CS005D39.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D39.raw] +PIC.Core.Station.CS005D40.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D40.raw] +PIC.Core.Station.CS005D41.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D41.raw] +PIC.Core.Station.CS005D42.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D42.raw] +PIC.Core.Station.CS005D43.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D43.raw] +PIC.Core.Station.CS005D44.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D44.raw] +PIC.Core.Station.CS005D45.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D45.raw] +PIC.Core.Station.CS005D46.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D46.raw] +PIC.Core.Station.CS005D47.RSP.ports = [file:/local/AARTFAAC-dataset/CS005D47.raw] +PIC.Core.Station.CS006D00.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D00.raw] +PIC.Core.Station.CS006D01.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D01.raw] +PIC.Core.Station.CS006D02.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D02.raw] +PIC.Core.Station.CS006D03.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D03.raw] +PIC.Core.Station.CS006D04.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D04.raw] +PIC.Core.Station.CS006D05.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D05.raw] +PIC.Core.Station.CS006D06.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D06.raw] +PIC.Core.Station.CS006D07.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D07.raw] +PIC.Core.Station.CS006D08.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D08.raw] +PIC.Core.Station.CS006D09.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D09.raw] +PIC.Core.Station.CS006D10.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D10.raw] +PIC.Core.Station.CS006D11.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D11.raw] +PIC.Core.Station.CS006D12.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D12.raw] +PIC.Core.Station.CS006D13.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D13.raw] +PIC.Core.Station.CS006D14.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D14.raw] +PIC.Core.Station.CS006D15.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D15.raw] +PIC.Core.Station.CS006D16.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D16.raw] +PIC.Core.Station.CS006D17.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D17.raw] +PIC.Core.Station.CS006D18.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D18.raw] +PIC.Core.Station.CS006D19.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D19.raw] +PIC.Core.Station.CS006D20.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D20.raw] +PIC.Core.Station.CS006D21.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D21.raw] +PIC.Core.Station.CS006D22.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D22.raw] +PIC.Core.Station.CS006D23.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D23.raw] +PIC.Core.Station.CS006D24.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D24.raw] +PIC.Core.Station.CS006D25.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D25.raw] +PIC.Core.Station.CS006D26.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D26.raw] +PIC.Core.Station.CS006D27.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D27.raw] +PIC.Core.Station.CS006D28.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D28.raw] +PIC.Core.Station.CS006D29.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D29.raw] +PIC.Core.Station.CS006D30.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D30.raw] +PIC.Core.Station.CS006D31.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D31.raw] +PIC.Core.Station.CS006D32.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D32.raw] +PIC.Core.Station.CS006D33.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D33.raw] +PIC.Core.Station.CS006D34.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D34.raw] +PIC.Core.Station.CS006D35.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D35.raw] +PIC.Core.Station.CS006D36.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D36.raw] +PIC.Core.Station.CS006D37.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D37.raw] +PIC.Core.Station.CS006D38.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D38.raw] +PIC.Core.Station.CS006D39.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D39.raw] +PIC.Core.Station.CS006D40.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D40.raw] +PIC.Core.Station.CS006D41.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D41.raw] +PIC.Core.Station.CS006D42.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D42.raw] +PIC.Core.Station.CS006D43.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D43.raw] +PIC.Core.Station.CS006D44.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D44.raw] +PIC.Core.Station.CS006D45.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D45.raw] +PIC.Core.Station.CS006D46.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D46.raw] +PIC.Core.Station.CS006D47.RSP.ports = [file:/local/AARTFAAC-dataset/CS006D47.raw] +PIC.Core.Station.CS007D00.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D00.raw] +PIC.Core.Station.CS007D01.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D01.raw] +PIC.Core.Station.CS007D02.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D02.raw] +PIC.Core.Station.CS007D03.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D03.raw] +PIC.Core.Station.CS007D04.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D04.raw] +PIC.Core.Station.CS007D05.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D05.raw] +PIC.Core.Station.CS007D06.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D06.raw] +PIC.Core.Station.CS007D07.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D07.raw] +PIC.Core.Station.CS007D08.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D08.raw] +PIC.Core.Station.CS007D09.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D09.raw] +PIC.Core.Station.CS007D10.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D10.raw] +PIC.Core.Station.CS007D11.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D11.raw] +PIC.Core.Station.CS007D12.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D12.raw] +PIC.Core.Station.CS007D13.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D13.raw] +PIC.Core.Station.CS007D14.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D14.raw] +PIC.Core.Station.CS007D15.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D15.raw] +PIC.Core.Station.CS007D16.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D16.raw] +PIC.Core.Station.CS007D17.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D17.raw] +PIC.Core.Station.CS007D18.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D18.raw] +PIC.Core.Station.CS007D19.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D19.raw] +PIC.Core.Station.CS007D20.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D20.raw] +PIC.Core.Station.CS007D21.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D21.raw] +PIC.Core.Station.CS007D22.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D22.raw] +PIC.Core.Station.CS007D23.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D23.raw] +PIC.Core.Station.CS007D24.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D24.raw] +PIC.Core.Station.CS007D25.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D25.raw] +PIC.Core.Station.CS007D26.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D26.raw] +PIC.Core.Station.CS007D27.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D27.raw] +PIC.Core.Station.CS007D28.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D28.raw] +PIC.Core.Station.CS007D29.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D29.raw] +PIC.Core.Station.CS007D30.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D30.raw] +PIC.Core.Station.CS007D31.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D31.raw] +PIC.Core.Station.CS007D32.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D32.raw] +PIC.Core.Station.CS007D33.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D33.raw] +PIC.Core.Station.CS007D34.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D34.raw] +PIC.Core.Station.CS007D35.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D35.raw] +PIC.Core.Station.CS007D36.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D36.raw] +PIC.Core.Station.CS007D37.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D37.raw] +PIC.Core.Station.CS007D38.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D38.raw] +PIC.Core.Station.CS007D39.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D39.raw] +PIC.Core.Station.CS007D40.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D40.raw] +PIC.Core.Station.CS007D41.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D41.raw] +PIC.Core.Station.CS007D42.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D42.raw] +PIC.Core.Station.CS007D43.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D43.raw] +PIC.Core.Station.CS007D44.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D44.raw] +PIC.Core.Station.CS007D45.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D45.raw] +PIC.Core.Station.CS007D46.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D46.raw] +PIC.Core.Station.CS007D47.RSP.ports = [file:/local/AARTFAAC-dataset/CS007D47.raw] +PIC.Core.CS002D00.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D01.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D02.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D03.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D04.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D05.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D06.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D07.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D08.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D09.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D10.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D11.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D12.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D13.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D14.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D15.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D16.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D17.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D18.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D19.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D20.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D21.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D22.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D23.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D24.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D25.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D26.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D27.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D28.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D29.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D30.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D31.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D32.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D33.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D34.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D35.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D36.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D37.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D38.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D39.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D40.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D41.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D42.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D43.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D44.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D45.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D46.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS002D47.clockCorrectionTime = 8.291395e-06 +PIC.Core.CS003D00.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D01.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D02.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D03.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D04.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D05.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D06.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D07.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D08.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D09.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D10.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D11.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D12.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D13.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D14.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D15.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D16.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D17.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D18.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D19.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D20.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D21.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D22.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D23.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D24.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D25.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D26.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D27.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D28.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D29.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D30.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D31.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D32.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D33.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D34.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D35.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D36.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D37.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D38.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D39.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D40.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D41.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D42.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D43.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D44.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D45.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D46.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS003D47.clockCorrectionTime = 6.889990e-06 +PIC.Core.CS004D00.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D01.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D02.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D03.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D04.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D05.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D06.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D07.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D08.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D09.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D10.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D11.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D12.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D13.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D14.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D15.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D16.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D17.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D18.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D19.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D20.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D21.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D22.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D23.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D24.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D25.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D26.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D27.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D28.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D29.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D30.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D31.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D32.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D33.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D34.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D35.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D36.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D37.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D38.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D39.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D40.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D41.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D42.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D43.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D44.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D45.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D46.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS004D47.clockCorrectionTime = 7.861759e-06 +PIC.Core.CS005D00.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D01.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D02.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D03.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D04.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D05.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D06.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D07.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D08.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D09.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D10.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D11.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D12.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D13.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D14.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D15.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D16.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D17.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D18.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D19.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D20.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D21.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D22.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D23.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D24.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D25.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D26.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D27.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D28.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D29.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D30.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D31.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D32.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D33.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D34.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D35.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D36.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D37.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D38.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D39.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D40.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D41.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D42.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D43.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D44.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D45.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D46.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS005D47.clockCorrectionTime = 8.513690e-06 +PIC.Core.CS006D00.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D01.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D02.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D03.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D04.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D05.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D06.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D07.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D08.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D09.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D10.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D11.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D12.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D13.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D14.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D15.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D16.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D17.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D18.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D19.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D20.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D21.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D22.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D23.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D24.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D25.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D26.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D27.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D28.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D29.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D30.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D31.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D32.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D33.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D34.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D35.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D36.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D37.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D38.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D39.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D40.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D41.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D42.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D43.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D44.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D45.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D46.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS006D47.clockCorrectionTime = 7.854904e-06 +PIC.Core.CS007D00.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D01.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D02.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D03.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D04.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D05.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D06.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D07.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D08.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D09.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D10.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D11.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D12.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D13.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D14.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D15.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D16.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D17.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D18.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D19.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D20.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D21.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D22.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D23.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D24.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D25.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D26.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D27.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D28.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D29.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D30.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D31.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D32.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D33.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D34.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D35.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D36.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D37.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D38.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D39.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D40.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D41.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D42.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D43.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D44.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D45.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D46.clockCorrectionTime = 7.885136e-06 +PIC.Core.CS007D47.clockCorrectionTime = 7.885136e-06 +Observation.referencePhaseCenter = [3826577.110,461022.900,5064892.758] +# LBA_INNER +#PIC.Core.CS002D00.phaseCenter = [3826577.109500,461022.900196,5064892.758000] +#PIC.Core.CS002D01.phaseCenter = [3826575.090500,461022.657196,5064894.296000] +#PIC.Core.CS002D02.phaseCenter = [3826575.771500,461025.005196,5064893.572000] +#PIC.Core.CS002D03.phaseCenter = [3826577.909500,461025.263196,5064891.944000] +#PIC.Core.CS002D04.phaseCenter = [3826579.128500,461023.143196,5064891.220000] +#PIC.Core.CS002D05.phaseCenter = [3826578.447500,461020.795196,5064891.944000] +#PIC.Core.CS002D06.phaseCenter = [3826576.309500,461020.537196,5064893.572000] +#PIC.Core.CS002D07.phaseCenter = [3826574.036500,461018.775195,5064895.437000] +#PIC.Core.CS002D08.phaseCenter = [3826572.516500,461022.347195,5064896.256000] +#PIC.Core.CS002D09.phaseCenter = [3826573.144500,461026.177195,5064895.438000] +#PIC.Core.CS002D10.phaseCenter = [3826575.628500,461028.475196,5064893.365000] +#PIC.Core.CS002D11.phaseCenter = [3826578.805500,461028.164196,5064891.009000] +#PIC.Core.CS002D12.phaseCenter = [3826581.188500,461025.390196,5064889.471000] +#PIC.Core.CS002D13.phaseCenter = [3826581.662500,461021.450196,5064889.471000] +#PIC.Core.CS002D14.phaseCenter = [3826580.006500,461018.190196,5064891.009000] +#PIC.Core.CS002D15.phaseCenter = [3826576.995500,461017.133196,5064893.365000] +#PIC.Core.CS002D16.phaseCenter = [3826576.272500,461013.726196,5064894.215000] +#PIC.Core.CS002D17.phaseCenter = [3826573.376500,461014.469195,5064896.322000] +#PIC.Core.CS002D18.phaseCenter = [3826570.181500,461017.749195,5064898.424000] +#PIC.Core.CS002D19.phaseCenter = [3826569.112500,461024.945195,5064898.576000] +#PIC.Core.CS002D20.phaseCenter = [3826569.852500,461027.976195,5064897.746000] +#PIC.Core.CS002D21.phaseCenter = [3826572.070500,461031.095195,5064895.800000] +#PIC.Core.CS002D22.phaseCenter = [3826575.368500,461031.959196,5064893.246000] +#PIC.Core.CS002D23.phaseCenter = [3826579.971500,461030.917196,5064889.885000] +#PIC.Core.CS002D24.phaseCenter = [3826583.385500,461028.524196,5064887.539000] +#PIC.Core.CS002D25.phaseCenter = [3826584.999500,461024.537196,5064886.688000] +#PIC.Core.CS002D26.phaseCenter = [3826584.442500,461020.707196,5064887.452000] +#PIC.Core.CS002D27.phaseCenter = [3826582.564500,461016.605196,5064889.232000] +#PIC.Core.CS002D28.phaseCenter = [3826579.539500,461014.081196,5064891.731000] +#PIC.Core.CS002D29.phaseCenter = [3826574.724500,461009.083195,5064895.797000] +#PIC.Core.CS002D30.phaseCenter = [3826571.744500,461010.132195,5064897.939000] +#PIC.Core.CS002D31.phaseCenter = [3826569.960500,461012.435195,5064899.070000] +#PIC.Core.CS002D32.phaseCenter = [3826567.013500,461019.699195,5064900.626000] +#PIC.Core.CS002D33.phaseCenter = [3826565.109500,461021.640195,5064901.880000] +#PIC.Core.CS002D34.phaseCenter = [3826567.204500,461028.490195,5064899.688000] +#PIC.Core.CS002D35.phaseCenter = [3826569.542500,461032.470195,5064897.573000] +#PIC.Core.CS002D36.phaseCenter = [3826571.812500,461036.793195,5064895.478000] +#PIC.Core.CS002D37.phaseCenter = [3826574.471500,461036.116195,5064893.544000] +#PIC.Core.CS002D38.phaseCenter = [3826578.276500,461035.734196,5064890.722000] +#PIC.Core.CS002D39.phaseCenter = [3826583.531500,461032.936196,5064887.030000] +#PIC.Core.CS002D40.phaseCenter = [3826585.419500,461031.329196,5064885.758000] +#PIC.Core.CS002D41.phaseCenter = [3826587.195500,461026.381196,5064884.873000] +#PIC.Core.CS002D42.phaseCenter = [3826589.188500,461022.935197,5064883.688000] +#PIC.Core.CS002D43.phaseCenter = [3826586.727500,461014.817196,5064886.269000] +#PIC.Core.CS002D44.phaseCenter = [3826584.542500,461012.072196,5064888.157000] +#PIC.Core.CS002D45.phaseCenter = [3826581.014500,461009.055196,5064891.078000] +#PIC.Core.CS002D46.phaseCenter = [3826603.455510,460977.274198,5064877.107000] +#PIC.Core.CS002D47.phaseCenter = [3826612.589500,461053.051198,5064863.401000] +##PIC.Core.CS003D00.phaseCenter = [3826516.791510,460930.018191,5064946.429000] +#PIC.Core.CS003D01.phaseCenter = [3826514.771510,460929.774191,5064947.967000] +#PIC.Core.CS003D02.phaseCenter = [3826515.452500,460932.123191,5064947.243000] +#PIC.Core.CS003D03.phaseCenter = [3826517.591510,460932.380191,5064945.615000] +#PIC.Core.CS003D04.phaseCenter = [3826518.810510,460930.261191,5064944.891000] +#PIC.Core.CS003D05.phaseCenter = [3826518.129510,460927.913191,5064945.615000] +#PIC.Core.CS003D06.phaseCenter = [3826515.991510,460927.655191,5064947.243000] +#PIC.Core.CS003D07.phaseCenter = [3826521.344510,460928.568192,5064943.142000] +#PIC.Core.CS003D08.phaseCenter = [3826519.688510,460925.308191,5064944.680000] +#PIC.Core.CS003D09.phaseCenter = [3826516.676510,460924.251191,5064947.036000] +#PIC.Core.CS003D10.phaseCenter = [3826513.718510,460925.893191,5064949.109000] +#PIC.Core.CS003D11.phaseCenter = [3826512.197510,460929.464191,5064949.927000] +#PIC.Core.CS003D12.phaseCenter = [3826512.826500,460933.295191,5064949.109000] +#PIC.Core.CS003D13.phaseCenter = [3826515.310500,460935.593191,5064947.037000] +#PIC.Core.CS003D14.phaseCenter = [3826518.487500,460935.281191,5064944.680000] +#PIC.Core.CS003D15.phaseCenter = [3826520.870510,460932.508192,5064943.142000] +#PIC.Core.CS003D16.phaseCenter = [3826523.638510,460933.271192,5064940.995000] +#PIC.Core.CS003D17.phaseCenter = [3826524.704510,460929.807192,5064940.509000] +#PIC.Core.CS003D18.phaseCenter = [3826524.166510,460924.869192,5064941.358000] +#PIC.Core.CS003D19.phaseCenter = [3826519.739510,460920.454191,5064945.080000] +#PIC.Core.CS003D20.phaseCenter = [3826517.224510,460919.880191,5064947.020000] +#PIC.Core.CS003D21.phaseCenter = [3826513.839510,460920.862191,5064949.472000] +#PIC.Core.CS003D22.phaseCenter = [3826511.434510,460924.032191,5064950.991000] +#PIC.Core.CS003D23.phaseCenter = [3826509.643500,460929.476191,5064951.844000] +#PIC.Core.CS003D24.phaseCenter = [3826509.441500,460934.248191,5064951.563000] +#PIC.Core.CS003D25.phaseCenter = [3826511.336500,460937.797191,5064949.821000] +#PIC.Core.CS003D26.phaseCenter = [3826514.308500,460938.931191,5064947.487000] +#PIC.Core.CS003D27.phaseCenter = [3826518.191500,460938.761191,5064944.588000] +#PIC.Core.CS003D28.phaseCenter = [3826521.604500,460936.638192,5064942.218000] +#PIC.Core.CS003D29.phaseCenter = [3826527.718510,460933.703192,5064937.894000] +#PIC.Core.CS003D30.phaseCenter = [3826528.618510,460930.008192,5064937.552000] +#PIC.Core.CS003D31.phaseCenter = [3826527.989510,460927.039192,5064938.293000] +#PIC.Core.CS003D32.phaseCenter = [3826524.541510,460920.564192,5064941.466000] +#PIC.Core.CS003D33.phaseCenter = [3826524.230510,460917.628192,5064941.965000] +#PIC.Core.CS003D34.phaseCenter = [3826518.314510,460916.787191,5064946.482000] +#PIC.Core.CS003D35.phaseCenter = [3826514.264510,460917.509191,5064949.456000] +#PIC.Core.CS003D36.phaseCenter = [3826510.012510,460918.002191,5064952.604000] +#PIC.Core.CS003D37.phaseCenter = [3826509.029510,460921.180191,5064953.054000] +#PIC.Core.CS003D38.phaseCenter = [3826507.214510,460925.463191,5064954.029000] +#PIC.Core.CS003D39.phaseCenter = [3826506.289500,460932.406191,5064954.096000] +#PIC.Core.CS003D40.phaseCenter = [3826506.376500,460935.174191,5064953.780000] +#PIC.Core.CS003D41.phaseCenter = [3826508.850500,460939.333191,5064951.547000] +#PIC.Core.CS003D42.phaseCenter = [3826510.161500,460943.047191,5064950.228000] +#PIC.Core.CS003D43.phaseCenter = [3826517.161500,460944.068191,5064944.882000] +#PIC.Core.CS003D44.phaseCenter = [3826520.267500,460942.952192,5064942.651000] +#PIC.Core.CS003D45.phaseCenter = [3826524.297500,460940.509192,5064939.846000] +#PIC.Core.CS003D46.phaseCenter = [3826478.141510,460906.374188,5064977.577990] +#PIC.Core.CS003D47.phaseCenter = [3826495.639500,460979.543190,5064957.829990] +#PIC.Core.CS004D00.phaseCenter = [3826654.240510,460939.528201,5064842.397000] +#PIC.Core.CS004D01.phaseCenter = [3826652.220510,460939.284201,5064843.935000] +#PIC.Core.CS004D02.phaseCenter = [3826652.902510,460941.633201,5064843.211000] +#PIC.Core.CS004D03.phaseCenter = [3826655.040510,460941.890201,5064841.583000] +#PIC.Core.CS004D04.phaseCenter = [3826656.259510,460939.771202,5064840.859000] +#PIC.Core.CS004D05.phaseCenter = [3826655.578510,460937.423201,5064841.583000] +#PIC.Core.CS004D06.phaseCenter = [3826653.440510,460937.165201,5064843.211000] +#PIC.Core.CS004D07.phaseCenter = [3826652.759510,460945.103201,5064843.004000] +#PIC.Core.CS004D08.phaseCenter = [3826655.936510,460944.791202,5064840.648000] +#PIC.Core.CS004D09.phaseCenter = [3826658.319510,460942.018202,5064839.110000] +#PIC.Core.CS004D10.phaseCenter = [3826658.793510,460938.078202,5064839.110000] +#PIC.Core.CS004D11.phaseCenter = [3826657.137510,460934.818202,5064840.648000] +#PIC.Core.CS004D12.phaseCenter = [3826654.126510,460933.761201,5064843.004000] +#PIC.Core.CS004D13.phaseCenter = [3826651.167510,460935.403201,5064845.076000] +#PIC.Core.CS004D14.phaseCenter = [3826649.647510,460938.974201,5064845.895000] +#PIC.Core.CS004D15.phaseCenter = [3826650.275510,460942.805201,5064845.077000] +#PIC.Core.CS004D16.phaseCenter = [3826648.230510,460945.448201,5064846.373000] +#PIC.Core.CS004D17.phaseCenter = [3826650.060510,460948.170201,5064844.753000] +#PIC.Core.CS004D18.phaseCenter = [3826653.792510,460949.826201,5064841.802000] +#PIC.Core.CS004D19.phaseCenter = [3826659.289510,460947.047202,5064837.928000] +#PIC.Core.CS004D20.phaseCenter = [3826661.064510,460944.589202,5064836.817000] +#PIC.Core.CS004D21.phaseCenter = [3826662.230510,460940.488202,5064836.312000] +#PIC.Core.CS004D22.phaseCenter = [3826661.338510,460936.455202,5064837.347000] +#PIC.Core.CS004D23.phaseCenter = [3826658.527510,460932.053202,5064839.855000] +#PIC.Core.CS004D24.phaseCenter = [3826655.313510,460929.673201,5064842.482000] +#PIC.Core.CS004D25.phaseCenter = [3826651.805510,460930.112201,5064845.075000] +#PIC.Core.CS004D26.phaseCenter = [3826649.390510,460932.807201,5064846.645000] +#PIC.Core.CS004D27.phaseCenter = [3826647.384510,460937.078201,5064847.764000] +#PIC.Core.CS004D28.phaseCenter = [3826646.997510,460941.726201,5064847.635000] +#PIC.Core.CS004D29.phaseCenter = [3826645.698510,460949.660201,5064847.893000] +#PIC.Core.CS004D30.phaseCenter = [3826647.779510,460952.306201,5064846.092000] +#PIC.Core.CS004D31.phaseCenter = [3826650.191510,460952.972201,5064844.221000] +#PIC.Core.CS004D32.phaseCenter = [3826656.586510,460952.182202,5064839.492000] +#PIC.Core.CS004D33.phaseCenter = [3826658.801510,460953.177202,5064837.740000] +#PIC.Core.CS004D34.phaseCenter = [3826662.622510,460947.168202,5064835.415000] +#PIC.Core.CS004D35.phaseCenter = [3826664.333510,460942.467202,5064834.555000] +#PIC.Core.CS004D36.phaseCenter = [3826666.316510,460937.651202,5064833.502000] +#PIC.Core.CS004D37.phaseCenter = [3826664.640510,460935.149202,5064834.986000] +#PIC.Core.CS004D38.phaseCenter = [3826662.649510,460931.249202,5064836.833000] +#PIC.Core.CS004D39.phaseCenter = [3826658.320510,460927.104202,5064840.458000] +#PIC.Core.CS004D40.phaseCenter = [3826656.344510,460925.943202,5064842.045000] +#PIC.Core.CS004D41.phaseCenter = [3826652.094510,460926.731201,5064845.164000] +#PIC.Core.CS004D42.phaseCenter = [3826648.791510,460926.463201,5064847.668000] +#PIC.Core.CS004D43.phaseCenter = [3826644.252510,460933.560201,5064850.433000] +#PIC.Core.CS004D44.phaseCenter = [3826643.331510,460937.422201,5064850.775000] +#PIC.Core.CS004D45.phaseCenter = [3826642.828510,460942.882201,5064850.660000] +#PIC.Core.CS004D46.phaseCenter = [3826664.882510,460885.708202,5064839.274000] +#PIC.Core.CS004D47.phaseCenter = [3826611.489510,460929.214198,5064875.419000] +#PIC.Core.CS005D00.phaseCenter = [3826668.793500,461069.502202,5064819.725000] +#PIC.Core.CS005D01.phaseCenter = [3826666.773500,461069.259202,5064821.263000] +#PIC.Core.CS005D02.phaseCenter = [3826667.455500,461071.607202,5064820.540000] +#PIC.Core.CS005D03.phaseCenter = [3826669.593500,461071.865202,5064818.911000] +#PIC.Core.CS005D04.phaseCenter = [3826670.812500,461069.745203,5064818.188000] +#PIC.Core.CS005D05.phaseCenter = [3826670.131500,461067.397202,5064818.911000] +#PIC.Core.CS005D06.phaseCenter = [3826667.993500,461067.139202,5064820.539000] +#PIC.Core.CS005D07.phaseCenter = [3826672.168500,461065.304203,5064817.572000] +#PIC.Core.CS005D08.phaseCenter = [3826669.326500,461063.728202,5064819.847000] +#PIC.Core.CS005D09.phaseCenter = [3826666.235500,461064.852202,5064822.066000] +#PIC.Core.CS005D10.phaseCenter = [3826664.341500,461068.153202,5064823.189000] +#PIC.Core.CS005D11.phaseCenter = [3826664.530500,461072.084202,5064822.692000] +#PIC.Core.CS005D12.phaseCenter = [3826666.714500,461074.807202,5064820.806000] +#PIC.Core.CS005D13.phaseCenter = [3826669.871500,461075.049202,5064818.415000] +#PIC.Core.CS005D14.phaseCenter = [3826672.523500,461072.694203,5064816.637000] +#PIC.Core.CS005D15.phaseCenter = [3826673.430500,461068.846203,5064816.304000] +#PIC.Core.CS005D16.phaseCenter = [3826676.182500,461067.642203,5064814.347000] +#PIC.Core.CS005D17.phaseCenter = [3826675.640500,461064.099203,5064815.074000] +#PIC.Core.CS005D18.phaseCenter = [3826673.065500,461060.409203,5064817.341000] +#PIC.Core.CS005D19.phaseCenter = [3826667.303500,461059.716202,5064821.728000] +#PIC.Core.CS005D20.phaseCenter = [3826664.852500,461060.909202,5064823.460000] +#PIC.Core.CS005D21.phaseCenter = [3826662.304500,461063.951202,5064825.098000] +#PIC.Core.CS005D22.phaseCenter = [3826661.545500,461068.138202,5064825.289000] +#PIC.Core.CS005D23.phaseCenter = [3826662.295500,461073.781202,5064824.216000] +#PIC.Core.CS005D24.phaseCenter = [3826664.152500,461077.825202,5064822.456000] +#PIC.Core.CS005D25.phaseCenter = [3826667.326500,461079.481202,5064819.925000] +#PIC.Core.CS005D26.phaseCenter = [3826670.414500,461078.446203,5064817.700000] +#PIC.Core.CS005D27.phaseCenter = [3826673.748500,461075.740203,5064815.443000] +#PIC.Core.CS005D28.phaseCenter = [3826675.834500,461071.745203,5064814.237000] +#PIC.Core.CS005D29.phaseCenter = [3826679.944500,461065.300203,5064811.735000] +#PIC.Core.CS005D30.phaseCenter = [3826679.158500,461061.679203,5064812.652000] +#PIC.Core.CS005D31.phaseCenter = [3826677.340500,461059.661203,5064814.199000] +#PIC.Core.CS005D32.phaseCenter = [3826671.560500,461056.633203,5064818.812000] +#PIC.Core.CS005D33.phaseCenter = [3826670.035500,461054.433202,5064820.155000] +#PIC.Core.CS005D34.phaseCenter = [3826664.490500,461057.654202,5064824.026000] +#PIC.Core.CS005D35.phaseCenter = [3826661.247500,461060.923202,5064826.165000] +#PIC.Core.CS005D36.phaseCenter = [3826657.728500,461064.137202,5064828.516000] +#PIC.Core.CS005D37.phaseCenter = [3826658.222500,461067.390202,5064827.851000] +#PIC.Core.CS005D38.phaseCenter = [3826658.456500,461072.098202,5064827.250000] +#PIC.Core.CS005D39.phaseCenter = [3826660.603500,461078.399202,5064825.068000] +#PIC.Core.CS005D40.phaseCenter = [3826661.860500,461080.609202,5064823.926000] +#PIC.Core.CS005D41.phaseCenter = [3826665.802500,461082.383202,5064820.807000] +#PIC.Core.CS005D42.phaseCenter = [3826668.533500,461084.560202,5064818.560000] +#PIC.Core.CS005D43.phaseCenter = [3826675.105500,461080.770203,5064813.969000] +#PIC.Core.CS005D44.phaseCenter = [3826677.353500,461077.802203,5064812.550000] +#PIC.Core.CS005D45.phaseCenter = [3826679.846500,461073.137203,5064811.100000] +#PIC.Core.CS005D46.phaseCenter = [3826687.931510,461018.878204,5064809.936000] +#PIC.Core.CS005D47.phaseCenter = [3826629.117500,461048.374199,5064851.417000] +#PIC.Core.CS006D00.phaseCenter = [3826596.773490,461145.130197,5064866.950000] +#PIC.Core.CS006D01.phaseCenter = [3826594.753490,461144.887197,5064868.488000] +#PIC.Core.CS006D02.phaseCenter = [3826595.435490,461147.235197,5064867.764000] +#PIC.Core.CS006D03.phaseCenter = [3826597.573490,461147.492197,5064866.136000] +#PIC.Core.CS006D04.phaseCenter = [3826598.792490,461145.373197,5064865.412000] +#PIC.Core.CS006D05.phaseCenter = [3826598.111490,461143.025197,5064866.136000] +#PIC.Core.CS006D06.phaseCenter = [3826595.973490,461142.767197,5064867.764000] +#PIC.Core.CS006D07.phaseCenter = [3826597.851490,461150.677197,5064865.640000] +#PIC.Core.CS006D08.phaseCenter = [3826600.503490,461148.322197,5064863.862000] +#PIC.Core.CS006D09.phaseCenter = [3826601.410490,461144.474197,5064863.529000] +#PIC.Core.CS006D10.phaseCenter = [3826600.147490,461140.933197,5064864.796000] +#PIC.Core.CS006D11.phaseCenter = [3826597.306490,461139.356197,5064867.072000] +#PIC.Core.CS006D12.phaseCenter = [3826594.215490,461140.480197,5064869.291000] +#PIC.Core.CS006D13.phaseCenter = [3826592.320490,461143.781197,5064870.414000] +#PIC.Core.CS006D14.phaseCenter = [3826592.510490,461147.711197,5064869.917000] +#PIC.Core.CS006D15.phaseCenter = [3826594.694490,461150.435197,5064868.031000] +#PIC.Core.CS006D16.phaseCenter = [3826594.027490,461153.953197,5064868.214000] +#PIC.Core.CS006D17.phaseCenter = [3826596.791490,461154.973197,5064866.047000] +#PIC.Core.CS006D18.phaseCenter = [3826600.770490,461153.865197,5064863.161000] +#PIC.Core.CS006D19.phaseCenter = [3826604.403490,461147.954198,5064860.968000] +#PIC.Core.CS006D20.phaseCenter = [3826604.912490,461144.768198,5064860.874000] +#PIC.Core.CS006D21.phaseCenter = [3826604.188490,461140.637198,5064861.791000] +#PIC.Core.CS006D22.phaseCenter = [3826601.686490,461137.921197,5064863.914000] +#PIC.Core.CS006D23.phaseCenter = [3826597.346490,461136.172197,5064867.330000] +#PIC.Core.CS006D24.phaseCenter = [3826593.514490,461136.346197,5064870.190000] +#PIC.Core.CS006D25.phaseCenter = [3826590.625490,461139.024197,5064872.117000] +#PIC.Core.CS006D26.phaseCenter = [3826589.657490,461142.828197,5064872.500000] +#PIC.Core.CS006D27.phaseCenter = [3826589.718490,461147.654197,5064872.017000] +#PIC.Core.CS006D28.phaseCenter = [3826591.359490,461151.718197,5064870.418000] +#PIC.Core.CS006D29.phaseCenter = [3826593.601490,461159.078197,5064868.070000] +#PIC.Core.CS006D30.phaseCenter = [3826596.554490,461159.869197,5064865.783000] +#PIC.Core.CS006D31.phaseCenter = [3826598.953490,461158.822197,5064864.076000] +#PIC.Core.CS006D32.phaseCenter = [3826604.222490,461153.948198,5064860.561000] +#PIC.Core.CS006D33.phaseCenter = [3826606.588490,461153.301198,5064858.844000] +#PIC.Core.CS006D34.phaseCenter = [3826607.378490,461145.851198,5064858.925000] +#PIC.Core.CS006D35.phaseCenter = [3826606.875490,461140.868198,5064859.753000] +#PIC.Core.CS006D36.phaseCenter = [3826606.561490,461135.611198,5064860.464000] +#PIC.Core.CS006D37.phaseCenter = [3826604.025490,461134.669198,5064862.452000] +#PIC.Core.CS006D38.phaseCenter = [3826600.618490,461132.789197,5064865.180000] +#PIC.Core.CS006D39.phaseCenter = [3826595.055490,461132.253197,5064869.404000] +#PIC.Core.CS006D40.phaseCenter = [3826592.828490,461132.608197,5064871.043000] +#PIC.Core.CS006D41.phaseCenter = [3826589.439490,461136.062197,5064873.275000] +#PIC.Core.CS006D42.phaseCenter = [3826586.428490,461138.026196,5064875.358000] +#PIC.Core.CS006D43.phaseCenter = [3826585.474490,461146.840196,5064875.277000] +#PIC.Core.CS006D44.phaseCenter = [3826586.311490,461150.614196,5064874.307000] +#PIC.Core.CS006D45.phaseCenter = [3826588.196490,461155.420196,5064872.458000] +#PIC.Core.CS006D46.phaseCenter = [3826629.969490,461178.945199,5064838.976000] +#PIC.Core.CS006D47.phaseCenter = [3826626.771500,461100.374199,5064848.479000] +#PIC.Core.CS007D00.phaseCenter = [3826533.404490,461098.918192,5064918.692000] +#PIC.Core.CS007D01.phaseCenter = [3826531.384490,461098.674192,5064920.230000] +#PIC.Core.CS007D02.phaseCenter = [3826532.065490,461101.023192,5064919.506000] +#PIC.Core.CS007D03.phaseCenter = [3826534.204490,461101.280192,5064917.878000] +#PIC.Core.CS007D04.phaseCenter = [3826535.423490,461099.161193,5064917.154000] +#PIC.Core.CS007D05.phaseCenter = [3826534.742490,461096.813193,5064917.878000] +#PIC.Core.CS007D06.phaseCenter = [3826532.604490,461096.555192,5064919.506000] +#PIC.Core.CS007D07.phaseCenter = [3826528.951490,461097.568192,5064922.156000] +#PIC.Core.CS007D08.phaseCenter = [3826529.140490,461101.500192,5064921.659000] +#PIC.Core.CS007D09.phaseCenter = [3826531.325490,461104.223192,5064919.773000] +#PIC.Core.CS007D10.phaseCenter = [3826534.481490,461104.465192,5064917.382000] +#PIC.Core.CS007D11.phaseCenter = [3826537.134490,461102.110193,5064915.604000] +#PIC.Core.CS007D12.phaseCenter = [3826538.041490,461098.262193,5064915.271000] +#PIC.Core.CS007D13.phaseCenter = [3826536.778490,461094.721193,5064916.538000] +#PIC.Core.CS007D14.phaseCenter = [3826533.937490,461093.144192,5064918.814000] +#PIC.Core.CS007D15.phaseCenter = [3826530.846490,461094.268192,5064921.032000] +#PIC.Core.CS007D16.phaseCenter = [3826528.761490,461091.953192,5064922.807000] +#PIC.Core.CS007D17.phaseCenter = [3826526.538490,461094.477192,5064924.247000] +#PIC.Core.CS007D18.phaseCenter = [3826525.135490,461099.276192,5064924.866000] +#PIC.Core.CS007D19.phaseCenter = [3826527.264490,461105.878192,5064922.671000] +#PIC.Core.CS007D20.phaseCenter = [3826529.204490,461107.873192,5064921.035000] +#PIC.Core.CS007D21.phaseCenter = [3826532.478490,461108.962192,5064918.479000] +#PIC.Core.CS007D22.phaseCenter = [3826535.737490,461107.492193,5064916.165000] +#PIC.Core.CS007D23.phaseCenter = [3826539.329490,461103.595193,5064913.822000] +#PIC.Core.CS007D24.phaseCenter = [3826541.303490,461099.378193,5064912.721000] +#PIC.Core.CS007D25.phaseCenter = [3826541.018490,461095.045193,5064913.326000] +#PIC.Core.CS007D26.phaseCenter = [3826538.898490,461092.275193,5064915.168000] +#PIC.Core.CS007D27.phaseCenter = [3826535.504490,461090.155193,5064917.908000] +#PIC.Core.CS007D28.phaseCenter = [3826531.776490,461090.087192,5064920.712000] +#PIC.Core.CS007D29.phaseCenter = [3826525.424490,461089.172192,5064925.563000] +#PIC.Core.CS007D30.phaseCenter = [3826523.258490,461092.002192,5064926.932000] +#PIC.Core.CS007D31.phaseCenter = [3826522.676490,461095.068192,5064927.092000] +#PIC.Core.CS007D32.phaseCenter = [3826523.188490,461102.967192,5064925.994000] +#PIC.Core.CS007D33.phaseCenter = [3826522.346490,461105.817192,5064926.369000] +#PIC.Core.CS007D34.phaseCenter = [3826527.102490,461110.045192,5064922.416000] +#PIC.Core.CS007D35.phaseCenter = [3826530.848490,461111.759192,5064919.450000] +#PIC.Core.CS007D36.phaseCenter = [3826534.681490,461113.802193,5064916.388000] +#PIC.Core.CS007D37.phaseCenter = [3826536.724490,461111.490193,5064915.064000] +#PIC.Core.CS007D38.phaseCenter = [3826539.896490,461108.662193,5064912.938000] +#PIC.Core.CS007D39.phaseCenter = [3826543.311490,461102.898193,5064910.896000] +#PIC.Core.CS007D40.phaseCenter = [3826544.282490,461100.331193,5064910.399000] +#PIC.Core.CS007D41.phaseCenter = [3826543.729490,461095.105193,5064911.286000] +#PIC.Core.CS007D42.phaseCenter = [3826544.009490,461090.964193,5064911.451000] +#PIC.Core.CS007D43.phaseCenter = [3826538.391490,461085.938193,5064916.121000] +#PIC.Core.CS007D44.phaseCenter = [3826535.306490,461085.134193,5064918.510000] +#PIC.Core.CS007D45.phaseCenter = [3826530.927490,461084.993192,5064921.810000] +#PIC.Core.CS007D46.phaseCenter = [3826523.497490,461153.009192,5064921.239000] +#PIC.Core.CS007D47.phaseCenter = [3826576.322490,461108.390196,5064885.621000] +#PIC.Core.CS002D00.position = [3826577.109500,461022.900196,5064892.758000] +#PIC.Core.CS002D01.position = [3826575.090500,461022.657196,5064894.296000] +#PIC.Core.CS002D02.position = [3826575.771500,461025.005196,5064893.572000] +#PIC.Core.CS002D03.position = [3826577.909500,461025.263196,5064891.944000] +#PIC.Core.CS002D04.position = [3826579.128500,461023.143196,5064891.220000] +#PIC.Core.CS002D05.position = [3826578.447500,461020.795196,5064891.944000] +#PIC.Core.CS002D06.position = [3826576.309500,461020.537196,5064893.572000] +#PIC.Core.CS002D07.position = [3826574.036500,461018.775195,5064895.437000] +#PIC.Core.CS002D08.position = [3826572.516500,461022.347195,5064896.256000] +#PIC.Core.CS002D09.position = [3826573.144500,461026.177195,5064895.438000] +#PIC.Core.CS002D10.position = [3826575.628500,461028.475196,5064893.365000] +#PIC.Core.CS002D11.position = [3826578.805500,461028.164196,5064891.009000] +#PIC.Core.CS002D12.position = [3826581.188500,461025.390196,5064889.471000] +#PIC.Core.CS002D13.position = [3826581.662500,461021.450196,5064889.471000] +#PIC.Core.CS002D14.position = [3826580.006500,461018.190196,5064891.009000] +#PIC.Core.CS002D15.position = [3826576.995500,461017.133196,5064893.365000] +#PIC.Core.CS002D16.position = [3826576.272500,461013.726196,5064894.215000] +#PIC.Core.CS002D17.position = [3826573.376500,461014.469195,5064896.322000] +#PIC.Core.CS002D18.position = [3826570.181500,461017.749195,5064898.424000] +#PIC.Core.CS002D19.position = [3826569.112500,461024.945195,5064898.576000] +#PIC.Core.CS002D20.position = [3826569.852500,461027.976195,5064897.746000] +#PIC.Core.CS002D21.position = [3826572.070500,461031.095195,5064895.800000] +#PIC.Core.CS002D22.position = [3826575.368500,461031.959196,5064893.246000] +#PIC.Core.CS002D23.position = [3826579.971500,461030.917196,5064889.885000] +#PIC.Core.CS002D24.position = [3826583.385500,461028.524196,5064887.539000] +#PIC.Core.CS002D25.position = [3826584.999500,461024.537196,5064886.688000] +#PIC.Core.CS002D26.position = [3826584.442500,461020.707196,5064887.452000] +#PIC.Core.CS002D27.position = [3826582.564500,461016.605196,5064889.232000] +#PIC.Core.CS002D28.position = [3826579.539500,461014.081196,5064891.731000] +#PIC.Core.CS002D29.position = [3826574.724500,461009.083195,5064895.797000] +#PIC.Core.CS002D30.position = [3826571.744500,461010.132195,5064897.939000] +#PIC.Core.CS002D31.position = [3826569.960500,461012.435195,5064899.070000] +#PIC.Core.CS002D32.position = [3826567.013500,461019.699195,5064900.626000] +#PIC.Core.CS002D33.position = [3826565.109500,461021.640195,5064901.880000] +#PIC.Core.CS002D34.position = [3826567.204500,461028.490195,5064899.688000] +#PIC.Core.CS002D35.position = [3826569.542500,461032.470195,5064897.573000] +#PIC.Core.CS002D36.position = [3826571.812500,461036.793195,5064895.478000] +#PIC.Core.CS002D37.position = [3826574.471500,461036.116195,5064893.544000] +#PIC.Core.CS002D38.position = [3826578.276500,461035.734196,5064890.722000] +#PIC.Core.CS002D39.position = [3826583.531500,461032.936196,5064887.030000] +#PIC.Core.CS002D40.position = [3826585.419500,461031.329196,5064885.758000] +#PIC.Core.CS002D41.position = [3826587.195500,461026.381196,5064884.873000] +#PIC.Core.CS002D42.position = [3826589.188500,461022.935197,5064883.688000] +#PIC.Core.CS002D43.position = [3826586.727500,461014.817196,5064886.269000] +#PIC.Core.CS002D44.position = [3826584.542500,461012.072196,5064888.157000] +#PIC.Core.CS002D45.position = [3826581.014500,461009.055196,5064891.078000] +#PIC.Core.CS002D46.position = [3826603.455510,460977.274198,5064877.107000] +#PIC.Core.CS002D47.position = [3826612.589500,461053.051198,5064863.401000] +##PIC.Core.CS003D00.position = [3826516.791510,460930.018191,5064946.429000] +#PIC.Core.CS003D01.position = [3826514.771510,460929.774191,5064947.967000] +#PIC.Core.CS003D02.position = [3826515.452500,460932.123191,5064947.243000] +#PIC.Core.CS003D03.position = [3826517.591510,460932.380191,5064945.615000] +#PIC.Core.CS003D04.position = [3826518.810510,460930.261191,5064944.891000] +#PIC.Core.CS003D05.position = [3826518.129510,460927.913191,5064945.615000] +#PIC.Core.CS003D06.position = [3826515.991510,460927.655191,5064947.243000] +#PIC.Core.CS003D07.position = [3826521.344510,460928.568192,5064943.142000] +#PIC.Core.CS003D08.position = [3826519.688510,460925.308191,5064944.680000] +#PIC.Core.CS003D09.position = [3826516.676510,460924.251191,5064947.036000] +#PIC.Core.CS003D10.position = [3826513.718510,460925.893191,5064949.109000] +#PIC.Core.CS003D11.position = [3826512.197510,460929.464191,5064949.927000] +#PIC.Core.CS003D12.position = [3826512.826500,460933.295191,5064949.109000] +#PIC.Core.CS003D13.position = [3826515.310500,460935.593191,5064947.037000] +#PIC.Core.CS003D14.position = [3826518.487500,460935.281191,5064944.680000] +#PIC.Core.CS003D15.position = [3826520.870510,460932.508192,5064943.142000] +#PIC.Core.CS003D16.position = [3826523.638510,460933.271192,5064940.995000] +#PIC.Core.CS003D17.position = [3826524.704510,460929.807192,5064940.509000] +#PIC.Core.CS003D18.position = [3826524.166510,460924.869192,5064941.358000] +#PIC.Core.CS003D19.position = [3826519.739510,460920.454191,5064945.080000] +#PIC.Core.CS003D20.position = [3826517.224510,460919.880191,5064947.020000] +#PIC.Core.CS003D21.position = [3826513.839510,460920.862191,5064949.472000] +#PIC.Core.CS003D22.position = [3826511.434510,460924.032191,5064950.991000] +#PIC.Core.CS003D23.position = [3826509.643500,460929.476191,5064951.844000] +#PIC.Core.CS003D24.position = [3826509.441500,460934.248191,5064951.563000] +#PIC.Core.CS003D25.position = [3826511.336500,460937.797191,5064949.821000] +#PIC.Core.CS003D26.position = [3826514.308500,460938.931191,5064947.487000] +#PIC.Core.CS003D27.position = [3826518.191500,460938.761191,5064944.588000] +#PIC.Core.CS003D28.position = [3826521.604500,460936.638192,5064942.218000] +#PIC.Core.CS003D29.position = [3826527.718510,460933.703192,5064937.894000] +#PIC.Core.CS003D30.position = [3826528.618510,460930.008192,5064937.552000] +#PIC.Core.CS003D31.position = [3826527.989510,460927.039192,5064938.293000] +#PIC.Core.CS003D32.position = [3826524.541510,460920.564192,5064941.466000] +#PIC.Core.CS003D33.position = [3826524.230510,460917.628192,5064941.965000] +#PIC.Core.CS003D34.position = [3826518.314510,460916.787191,5064946.482000] +#PIC.Core.CS003D35.position = [3826514.264510,460917.509191,5064949.456000] +#PIC.Core.CS003D36.position = [3826510.012510,460918.002191,5064952.604000] +#PIC.Core.CS003D37.position = [3826509.029510,460921.180191,5064953.054000] +#PIC.Core.CS003D38.position = [3826507.214510,460925.463191,5064954.029000] +#PIC.Core.CS003D39.position = [3826506.289500,460932.406191,5064954.096000] +#PIC.Core.CS003D40.position = [3826506.376500,460935.174191,5064953.780000] +#PIC.Core.CS003D41.position = [3826508.850500,460939.333191,5064951.547000] +#PIC.Core.CS003D42.position = [3826510.161500,460943.047191,5064950.228000] +#PIC.Core.CS003D43.position = [3826517.161500,460944.068191,5064944.882000] +#PIC.Core.CS003D44.position = [3826520.267500,460942.952192,5064942.651000] +#PIC.Core.CS003D45.position = [3826524.297500,460940.509192,5064939.846000] +#PIC.Core.CS003D46.position = [3826478.141510,460906.374188,5064977.577990] +#PIC.Core.CS003D47.position = [3826495.639500,460979.543190,5064957.829990] +#PIC.Core.CS004D00.position = [3826654.240510,460939.528201,5064842.397000] +#PIC.Core.CS004D01.position = [3826652.220510,460939.284201,5064843.935000] +#PIC.Core.CS004D02.position = [3826652.902510,460941.633201,5064843.211000] +#PIC.Core.CS004D03.position = [3826655.040510,460941.890201,5064841.583000] +#PIC.Core.CS004D04.position = [3826656.259510,460939.771202,5064840.859000] +#PIC.Core.CS004D05.position = [3826655.578510,460937.423201,5064841.583000] +#PIC.Core.CS004D06.position = [3826653.440510,460937.165201,5064843.211000] +#PIC.Core.CS004D07.position = [3826652.759510,460945.103201,5064843.004000] +#PIC.Core.CS004D08.position = [3826655.936510,460944.791202,5064840.648000] +#PIC.Core.CS004D09.position = [3826658.319510,460942.018202,5064839.110000] +#PIC.Core.CS004D10.position = [3826658.793510,460938.078202,5064839.110000] +#PIC.Core.CS004D11.position = [3826657.137510,460934.818202,5064840.648000] +#PIC.Core.CS004D12.position = [3826654.126510,460933.761201,5064843.004000] +#PIC.Core.CS004D13.position = [3826651.167510,460935.403201,5064845.076000] +#PIC.Core.CS004D14.position = [3826649.647510,460938.974201,5064845.895000] +#PIC.Core.CS004D15.position = [3826650.275510,460942.805201,5064845.077000] +#PIC.Core.CS004D16.position = [3826648.230510,460945.448201,5064846.373000] +#PIC.Core.CS004D17.position = [3826650.060510,460948.170201,5064844.753000] +#PIC.Core.CS004D18.position = [3826653.792510,460949.826201,5064841.802000] +#PIC.Core.CS004D19.position = [3826659.289510,460947.047202,5064837.928000] +#PIC.Core.CS004D20.position = [3826661.064510,460944.589202,5064836.817000] +#PIC.Core.CS004D21.position = [3826662.230510,460940.488202,5064836.312000] +#PIC.Core.CS004D22.position = [3826661.338510,460936.455202,5064837.347000] +#PIC.Core.CS004D23.position = [3826658.527510,460932.053202,5064839.855000] +#PIC.Core.CS004D24.position = [3826655.313510,460929.673201,5064842.482000] +#PIC.Core.CS004D25.position = [3826651.805510,460930.112201,5064845.075000] +#PIC.Core.CS004D26.position = [3826649.390510,460932.807201,5064846.645000] +#PIC.Core.CS004D27.position = [3826647.384510,460937.078201,5064847.764000] +#PIC.Core.CS004D28.position = [3826646.997510,460941.726201,5064847.635000] +#PIC.Core.CS004D29.position = [3826645.698510,460949.660201,5064847.893000] +#PIC.Core.CS004D30.position = [3826647.779510,460952.306201,5064846.092000] +#PIC.Core.CS004D31.position = [3826650.191510,460952.972201,5064844.221000] +#PIC.Core.CS004D32.position = [3826656.586510,460952.182202,5064839.492000] +#PIC.Core.CS004D33.position = [3826658.801510,460953.177202,5064837.740000] +#PIC.Core.CS004D34.position = [3826662.622510,460947.168202,5064835.415000] +#PIC.Core.CS004D35.position = [3826664.333510,460942.467202,5064834.555000] +#PIC.Core.CS004D36.position = [3826666.316510,460937.651202,5064833.502000] +#PIC.Core.CS004D37.position = [3826664.640510,460935.149202,5064834.986000] +#PIC.Core.CS004D38.position = [3826662.649510,460931.249202,5064836.833000] +#PIC.Core.CS004D39.position = [3826658.320510,460927.104202,5064840.458000] +#PIC.Core.CS004D40.position = [3826656.344510,460925.943202,5064842.045000] +#PIC.Core.CS004D41.position = [3826652.094510,460926.731201,5064845.164000] +#PIC.Core.CS004D42.position = [3826648.791510,460926.463201,5064847.668000] +#PIC.Core.CS004D43.position = [3826644.252510,460933.560201,5064850.433000] +#PIC.Core.CS004D44.position = [3826643.331510,460937.422201,5064850.775000] +#PIC.Core.CS004D45.position = [3826642.828510,460942.882201,5064850.660000] +#PIC.Core.CS004D46.position = [3826664.882510,460885.708202,5064839.274000] +#PIC.Core.CS004D47.position = [3826611.489510,460929.214198,5064875.419000] +#PIC.Core.CS005D00.position = [3826668.793500,461069.502202,5064819.725000] +#PIC.Core.CS005D01.position = [3826666.773500,461069.259202,5064821.263000] +#PIC.Core.CS005D02.position = [3826667.455500,461071.607202,5064820.540000] +#PIC.Core.CS005D03.position = [3826669.593500,461071.865202,5064818.911000] +#PIC.Core.CS005D04.position = [3826670.812500,461069.745203,5064818.188000] +#PIC.Core.CS005D05.position = [3826670.131500,461067.397202,5064818.911000] +#PIC.Core.CS005D06.position = [3826667.993500,461067.139202,5064820.539000] +#PIC.Core.CS005D07.position = [3826672.168500,461065.304203,5064817.572000] +#PIC.Core.CS005D08.position = [3826669.326500,461063.728202,5064819.847000] +#PIC.Core.CS005D09.position = [3826666.235500,461064.852202,5064822.066000] +#PIC.Core.CS005D10.position = [3826664.341500,461068.153202,5064823.189000] +#PIC.Core.CS005D11.position = [3826664.530500,461072.084202,5064822.692000] +#PIC.Core.CS005D12.position = [3826666.714500,461074.807202,5064820.806000] +#PIC.Core.CS005D13.position = [3826669.871500,461075.049202,5064818.415000] +#PIC.Core.CS005D14.position = [3826672.523500,461072.694203,5064816.637000] +#PIC.Core.CS005D15.position = [3826673.430500,461068.846203,5064816.304000] +#PIC.Core.CS005D16.position = [3826676.182500,461067.642203,5064814.347000] +#PIC.Core.CS005D17.position = [3826675.640500,461064.099203,5064815.074000] +#PIC.Core.CS005D18.position = [3826673.065500,461060.409203,5064817.341000] +#PIC.Core.CS005D19.position = [3826667.303500,461059.716202,5064821.728000] +#PIC.Core.CS005D20.position = [3826664.852500,461060.909202,5064823.460000] +#PIC.Core.CS005D21.position = [3826662.304500,461063.951202,5064825.098000] +#PIC.Core.CS005D22.position = [3826661.545500,461068.138202,5064825.289000] +#PIC.Core.CS005D23.position = [3826662.295500,461073.781202,5064824.216000] +#PIC.Core.CS005D24.position = [3826664.152500,461077.825202,5064822.456000] +#PIC.Core.CS005D25.position = [3826667.326500,461079.481202,5064819.925000] +#PIC.Core.CS005D26.position = [3826670.414500,461078.446203,5064817.700000] +#PIC.Core.CS005D27.position = [3826673.748500,461075.740203,5064815.443000] +#PIC.Core.CS005D28.position = [3826675.834500,461071.745203,5064814.237000] +#PIC.Core.CS005D29.position = [3826679.944500,461065.300203,5064811.735000] +#PIC.Core.CS005D30.position = [3826679.158500,461061.679203,5064812.652000] +#PIC.Core.CS005D31.position = [3826677.340500,461059.661203,5064814.199000] +#PIC.Core.CS005D32.position = [3826671.560500,461056.633203,5064818.812000] +#PIC.Core.CS005D33.position = [3826670.035500,461054.433202,5064820.155000] +#PIC.Core.CS005D34.position = [3826664.490500,461057.654202,5064824.026000] +#PIC.Core.CS005D35.position = [3826661.247500,461060.923202,5064826.165000] +#PIC.Core.CS005D36.position = [3826657.728500,461064.137202,5064828.516000] +#PIC.Core.CS005D37.position = [3826658.222500,461067.390202,5064827.851000] +#PIC.Core.CS005D38.position = [3826658.456500,461072.098202,5064827.250000] +#PIC.Core.CS005D39.position = [3826660.603500,461078.399202,5064825.068000] +#PIC.Core.CS005D40.position = [3826661.860500,461080.609202,5064823.926000] +#PIC.Core.CS005D41.position = [3826665.802500,461082.383202,5064820.807000] +#PIC.Core.CS005D42.position = [3826668.533500,461084.560202,5064818.560000] +#PIC.Core.CS005D43.position = [3826675.105500,461080.770203,5064813.969000] +#PIC.Core.CS005D44.position = [3826677.353500,461077.802203,5064812.550000] +#PIC.Core.CS005D45.position = [3826679.846500,461073.137203,5064811.100000] +#PIC.Core.CS005D46.position = [3826687.931510,461018.878204,5064809.936000] +#PIC.Core.CS005D47.position = [3826629.117500,461048.374199,5064851.417000] +#PIC.Core.CS006D00.position = [3826596.773490,461145.130197,5064866.950000] +#PIC.Core.CS006D01.position = [3826594.753490,461144.887197,5064868.488000] +#PIC.Core.CS006D02.position = [3826595.435490,461147.235197,5064867.764000] +#PIC.Core.CS006D03.position = [3826597.573490,461147.492197,5064866.136000] +#PIC.Core.CS006D04.position = [3826598.792490,461145.373197,5064865.412000] +#PIC.Core.CS006D05.position = [3826598.111490,461143.025197,5064866.136000] +#PIC.Core.CS006D06.position = [3826595.973490,461142.767197,5064867.764000] +#PIC.Core.CS006D07.position = [3826597.851490,461150.677197,5064865.640000] +#PIC.Core.CS006D08.position = [3826600.503490,461148.322197,5064863.862000] +#PIC.Core.CS006D09.position = [3826601.410490,461144.474197,5064863.529000] +#PIC.Core.CS006D10.position = [3826600.147490,461140.933197,5064864.796000] +#PIC.Core.CS006D11.position = [3826597.306490,461139.356197,5064867.072000] +#PIC.Core.CS006D12.position = [3826594.215490,461140.480197,5064869.291000] +#PIC.Core.CS006D13.position = [3826592.320490,461143.781197,5064870.414000] +#PIC.Core.CS006D14.position = [3826592.510490,461147.711197,5064869.917000] +#PIC.Core.CS006D15.position = [3826594.694490,461150.435197,5064868.031000] +#PIC.Core.CS006D16.position = [3826594.027490,461153.953197,5064868.214000] +#PIC.Core.CS006D17.position = [3826596.791490,461154.973197,5064866.047000] +#PIC.Core.CS006D18.position = [3826600.770490,461153.865197,5064863.161000] +#PIC.Core.CS006D19.position = [3826604.403490,461147.954198,5064860.968000] +#PIC.Core.CS006D20.position = [3826604.912490,461144.768198,5064860.874000] +#PIC.Core.CS006D21.position = [3826604.188490,461140.637198,5064861.791000] +#PIC.Core.CS006D22.position = [3826601.686490,461137.921197,5064863.914000] +#PIC.Core.CS006D23.position = [3826597.346490,461136.172197,5064867.330000] +#PIC.Core.CS006D24.position = [3826593.514490,461136.346197,5064870.190000] +#PIC.Core.CS006D25.position = [3826590.625490,461139.024197,5064872.117000] +#PIC.Core.CS006D26.position = [3826589.657490,461142.828197,5064872.500000] +#PIC.Core.CS006D27.position = [3826589.718490,461147.654197,5064872.017000] +#PIC.Core.CS006D28.position = [3826591.359490,461151.718197,5064870.418000] +#PIC.Core.CS006D29.position = [3826593.601490,461159.078197,5064868.070000] +#PIC.Core.CS006D30.position = [3826596.554490,461159.869197,5064865.783000] +#PIC.Core.CS006D31.position = [3826598.953490,461158.822197,5064864.076000] +#PIC.Core.CS006D32.position = [3826604.222490,461153.948198,5064860.561000] +#PIC.Core.CS006D33.position = [3826606.588490,461153.301198,5064858.844000] +#PIC.Core.CS006D34.position = [3826607.378490,461145.851198,5064858.925000] +#PIC.Core.CS006D35.position = [3826606.875490,461140.868198,5064859.753000] +#PIC.Core.CS006D36.position = [3826606.561490,461135.611198,5064860.464000] +#PIC.Core.CS006D37.position = [3826604.025490,461134.669198,5064862.452000] +#PIC.Core.CS006D38.position = [3826600.618490,461132.789197,5064865.180000] +#PIC.Core.CS006D39.position = [3826595.055490,461132.253197,5064869.404000] +#PIC.Core.CS006D40.position = [3826592.828490,461132.608197,5064871.043000] +#PIC.Core.CS006D41.position = [3826589.439490,461136.062197,5064873.275000] +#PIC.Core.CS006D42.position = [3826586.428490,461138.026196,5064875.358000] +#PIC.Core.CS006D43.position = [3826585.474490,461146.840196,5064875.277000] +#PIC.Core.CS006D44.position = [3826586.311490,461150.614196,5064874.307000] +#PIC.Core.CS006D45.position = [3826588.196490,461155.420196,5064872.458000] +#PIC.Core.CS006D46.position = [3826629.969490,461178.945199,5064838.976000] +#PIC.Core.CS006D47.position = [3826626.771500,461100.374199,5064848.479000] +#PIC.Core.CS007D00.position = [3826533.404490,461098.918192,5064918.692000] +#PIC.Core.CS007D01.position = [3826531.384490,461098.674192,5064920.230000] +#PIC.Core.CS007D02.position = [3826532.065490,461101.023192,5064919.506000] +#PIC.Core.CS007D03.position = [3826534.204490,461101.280192,5064917.878000] +#PIC.Core.CS007D04.position = [3826535.423490,461099.161193,5064917.154000] +#PIC.Core.CS007D05.position = [3826534.742490,461096.813193,5064917.878000] +#PIC.Core.CS007D06.position = [3826532.604490,461096.555192,5064919.506000] +#PIC.Core.CS007D07.position = [3826528.951490,461097.568192,5064922.156000] +#PIC.Core.CS007D08.position = [3826529.140490,461101.500192,5064921.659000] +#PIC.Core.CS007D09.position = [3826531.325490,461104.223192,5064919.773000] +#PIC.Core.CS007D10.position = [3826534.481490,461104.465192,5064917.382000] +#PIC.Core.CS007D11.position = [3826537.134490,461102.110193,5064915.604000] +#PIC.Core.CS007D12.position = [3826538.041490,461098.262193,5064915.271000] +#PIC.Core.CS007D13.position = [3826536.778490,461094.721193,5064916.538000] +#PIC.Core.CS007D14.position = [3826533.937490,461093.144192,5064918.814000] +#PIC.Core.CS007D15.position = [3826530.846490,461094.268192,5064921.032000] +#PIC.Core.CS007D16.position = [3826528.761490,461091.953192,5064922.807000] +#PIC.Core.CS007D17.position = [3826526.538490,461094.477192,5064924.247000] +#PIC.Core.CS007D18.position = [3826525.135490,461099.276192,5064924.866000] +#PIC.Core.CS007D19.position = [3826527.264490,461105.878192,5064922.671000] +#PIC.Core.CS007D20.position = [3826529.204490,461107.873192,5064921.035000] +#PIC.Core.CS007D21.position = [3826532.478490,461108.962192,5064918.479000] +#PIC.Core.CS007D22.position = [3826535.737490,461107.492193,5064916.165000] +#PIC.Core.CS007D23.position = [3826539.329490,461103.595193,5064913.822000] +#PIC.Core.CS007D24.position = [3826541.303490,461099.378193,5064912.721000] +#PIC.Core.CS007D25.position = [3826541.018490,461095.045193,5064913.326000] +#PIC.Core.CS007D26.position = [3826538.898490,461092.275193,5064915.168000] +#PIC.Core.CS007D27.position = [3826535.504490,461090.155193,5064917.908000] +#PIC.Core.CS007D28.position = [3826531.776490,461090.087192,5064920.712000] +#PIC.Core.CS007D29.position = [3826525.424490,461089.172192,5064925.563000] +#PIC.Core.CS007D30.position = [3826523.258490,461092.002192,5064926.932000] +#PIC.Core.CS007D31.position = [3826522.676490,461095.068192,5064927.092000] +#PIC.Core.CS007D32.position = [3826523.188490,461102.967192,5064925.994000] +#PIC.Core.CS007D33.position = [3826522.346490,461105.817192,5064926.369000] +#PIC.Core.CS007D34.position = [3826527.102490,461110.045192,5064922.416000] +#PIC.Core.CS007D35.position = [3826530.848490,461111.759192,5064919.450000] +#PIC.Core.CS007D36.position = [3826534.681490,461113.802193,5064916.388000] +#PIC.Core.CS007D37.position = [3826536.724490,461111.490193,5064915.064000] +#PIC.Core.CS007D38.position = [3826539.896490,461108.662193,5064912.938000] +#PIC.Core.CS007D39.position = [3826543.311490,461102.898193,5064910.896000] +#PIC.Core.CS007D40.position = [3826544.282490,461100.331193,5064910.399000] +#PIC.Core.CS007D41.position = [3826543.729490,461095.105193,5064911.286000] +#PIC.Core.CS007D42.position = [3826544.009490,461090.964193,5064911.451000] +#PIC.Core.CS007D43.position = [3826538.391490,461085.938193,5064916.121000] +#PIC.Core.CS007D44.position = [3826535.306490,461085.134193,5064918.510000] +#PIC.Core.CS007D45.position = [3826530.927490,461084.993192,5064921.810000] +#PIC.Core.CS007D46.position = [3826523.497490,461153.009192,5064921.239000] +#PIC.Core.CS007D47.position = [3826576.322490,461108.390196,5064885.621000] +# LBA_OUTER +PIC.Core.CS002D00.phaseCenter = [3826579.492500,461005.105196,5064892.578000] +PIC.Core.CS002D01.phaseCenter = [3826578.065500,461002.706196,5064893.866000] +PIC.Core.CS002D02.phaseCenter = [3826566.278500,461008.429195,5064902.196000] +PIC.Core.CS002D03.phaseCenter = [3826561.238500,461023.545195,5064904.613000] +PIC.Core.CS002D04.phaseCenter = [3826561.048500,461028.462194,5064904.311000] +PIC.Core.CS002D05.phaseCenter = [3826567.371500,461037.218195,5064898.774000] +PIC.Core.CS002D06.phaseCenter = [3826577.471500,461040.701196,5064890.878000] +PIC.Core.CS002D07.phaseCenter = [3826583.820500,461041.021196,5064886.083000] +PIC.Core.CS002D08.phaseCenter = [3826590.492500,461028.842197,5064882.175000] +PIC.Core.CS002D09.phaseCenter = [3826593.010500,461021.160197,5064880.980000] +PIC.Core.CS002D10.phaseCenter = [3826589.881500,461012.254197,5064884.134000] +PIC.Core.CS002D11.phaseCenter = [3826590.888500,461004.124197,5064884.112000] +PIC.Core.CS002D12.phaseCenter = [3826581.799500,460998.392196,5064891.453000] +PIC.Core.CS002D13.phaseCenter = [3826571.486500,460998.986195,5064899.140000] +PIC.Core.CS002D14.phaseCenter = [3826566.799500,461001.550195,5064902.426000] +PIC.Core.CS002D15.phaseCenter = [3826561.056500,461006.369194,5064906.301000] +PIC.Core.CS002D16.phaseCenter = [3826557.923500,461023.707194,5064907.087000] +PIC.Core.CS002D17.phaseCenter = [3826558.452500,461031.588194,5064905.977000] +PIC.Core.CS002D18.phaseCenter = [3826563.447500,461043.574195,5064901.145000] +PIC.Core.CS002D19.phaseCenter = [3826574.094500,461047.864195,5064892.765000] +PIC.Core.CS002D20.phaseCenter = [3826581.534500,461047.956196,5064887.172000] +PIC.Core.CS002D21.phaseCenter = [3826589.542500,461041.302197,5064881.763000] +PIC.Core.CS002D22.phaseCenter = [3826597.988500,461028.593197,5064876.571000] +PIC.Core.CS002D23.phaseCenter = [3826597.031500,461011.026197,5064878.878000] +PIC.Core.CS002D24.phaseCenter = [3826596.814500,460997.283197,5064880.282000] +PIC.Core.CS002D25.phaseCenter = [3826585.611500,460992.916196,5064889.087000] +PIC.Core.CS002D26.phaseCenter = [3826573.967500,460992.191195,5064897.892000] +PIC.Core.CS002D27.phaseCenter = [3826567.574500,460993.250195,5064902.595000] +PIC.Core.CS002D28.phaseCenter = [3826556.250500,461006.503194,5064909.897000] +PIC.Core.CS002D29.phaseCenter = [3826549.777500,461023.012194,5064913.263000] +PIC.Core.CS002D30.phaseCenter = [3826551.123500,461035.050194,5064911.165000] +PIC.Core.CS002D31.phaseCenter = [3826556.956500,461040.527194,5064906.292000] +PIC.Core.CS002D32.phaseCenter = [3826562.717500,461052.851195,5064900.854000] +PIC.Core.CS002D33.phaseCenter = [3826577.449500,461056.665196,5064889.451000] +PIC.Core.CS002D34.phaseCenter = [3826584.539500,461054.769196,5064884.300000] +PIC.Core.CS002D35.phaseCenter = [3826595.997500,461046.957197,5064876.406000] +PIC.Core.CS002D36.phaseCenter = [3826598.671500,461039.287197,5064875.092000] +PIC.Core.CS002D37.phaseCenter = [3826603.738500,461019.411198,5064873.085000] +PIC.Core.CS002D38.phaseCenter = [3826600.940500,461013.887197,5064875.685000] +PIC.Core.CS002D39.phaseCenter = [3826605.295500,461010.467198,5064872.725000] +PIC.Core.CS002D40.phaseCenter = [3826585.073500,460981.955196,5064890.481000] +PIC.Core.CS002D41.phaseCenter = [3826564.739500,460986.971195,5064905.291000] +PIC.Core.CS002D42.phaseCenter = [3826551.009500,460998.299194,5064914.572000] +PIC.Core.CS002D43.phaseCenter = [3826546.172500,461013.473193,5064916.832000] +PIC.Core.CS002D44.phaseCenter = [3826556.131500,461055.945194,5064905.517000] +PIC.Core.CS002D45.phaseCenter = [3826573.680500,461059.292195,5064892.043000] +PIC.Core.CS002D46.phaseCenter = [3826586.498500,461061.863196,5064882.189000] +PIC.Core.CS002D47.phaseCenter = [3826607.822500,461026.590198,5064869.371000] +PIC.Core.CS003D00.phaseCenter = [3826527.881500,460940.655192,5064937.143000] +PIC.Core.CS003D01.phaseCenter = [3826530.332500,460940.201192,5064935.344000] +PIC.Core.CS003D02.phaseCenter = [3826532.793510,460924.878192,5064934.882000] +PIC.Core.CS003D03.phaseCenter = [3826525.021510,460912.584192,5064941.827000] +PIC.Core.CS003D04.phaseCenter = [3826521.701510,460910.152192,5064944.539000] +PIC.Core.CS003D05.phaseCenter = [3826512.144510,460913.012191,5064951.454000] +PIC.Core.CS003D06.phaseCenter = [3826504.196510,460922.342190,5064956.577000] +PIC.Core.CS003D07.phaseCenter = [3826500.500500,460929.054190,5064958.744000] +PIC.Core.CS003D08.phaseCenter = [3826505.334500,460941.780190,5064953.966000] +PIC.Core.CS003D09.phaseCenter = [3826509.307500,460947.981191,5064950.423000] +PIC.Core.CS003D10.phaseCenter = [3826517.220500,460948.636191,5064944.424000] +PIC.Core.CS003D11.phaseCenter = [3826522.331500,460953.408192,5064940.156000] +PIC.Core.CS003D12.phaseCenter = [3826531.295500,460946.188192,5064934.080000] +PIC.Core.CS003D13.phaseCenter = [3826536.521510,460934.781193,5064931.189000] +PIC.Core.CS003D14.phaseCenter = [3826537.298510,460928.557193,5064931.168000] +PIC.Core.CS003D15.phaseCenter = [3826537.082510,460920.171193,5064932.088000] +PIC.Core.CS003D16.phaseCenter = [3826526.721510,460908.930192,5064940.881000] +PIC.Core.CS003D17.phaseCenter = [3826520.943510,460905.931192,5064945.489000] +PIC.Core.CS003D18.phaseCenter = [3826509.863510,460905.894191,5064953.809000] +PIC.Core.CS003D19.phaseCenter = [3826501.054510,460915.450190,5064959.558000] +PIC.Core.CS003D20.phaseCenter = [3826496.920500,460923.443190,5064961.938000] +PIC.Core.CS003D21.phaseCenter = [3826497.175500,460935.107190,5064960.693000] +PIC.Core.CS003D22.phaseCenter = [3826501.408500,460949.988190,5064956.171000] +PIC.Core.CS003D23.phaseCenter = [3826514.166500,460956.915191,5064945.968000] +PIC.Core.CS003D24.phaseCenter = [3826523.855500,460962.908192,5064938.154000] +PIC.Core.CS003D25.phaseCenter = [3826533.025500,460952.786192,5064932.186000] +PIC.Core.CS003D26.phaseCenter = [3826539.897510,460940.539193,5064928.134000] +PIC.Core.CS003D27.phaseCenter = [3826542.655510,460933.155193,5064926.731000] +PIC.Core.CS003D28.phaseCenter = [3826539.617510,460914.920193,5064930.660000] +PIC.Core.CS003D29.phaseCenter = [3826531.660510,460900.448192,5064937.941000] +PIC.Core.CS003D30.phaseCenter = [3826522.539510,460896.447192,5064945.148000] +PIC.Core.CS003D31.phaseCenter = [3826515.535510,460900.265191,5064950.061000] +PIC.Core.CS003D32.phaseCenter = [3826503.801510,460900.903190,5064958.811000] +PIC.Core.CS003D33.phaseCenter = [3826493.089510,460915.084190,5064965.570000] +PIC.Core.CS003D34.phaseCenter = [3826490.532500,460923.602189,5064966.718990] +PIC.Core.CS003D35.phaseCenter = [3826489.706500,460939.517189,5064965.900990] +PIC.Core.CS003D36.phaseCenter = [3826493.586500,460945.880190,5064962.412990] +PIC.Core.CS003D37.phaseCenter = [3826504.658500,460960.358190,5064952.794000] +PIC.Core.CS003D38.phaseCenter = [3826510.036500,460959.840191,5064948.804000] +PIC.Core.CS003D39.phaseCenter = [3826510.036500,460966.093191,5064948.239000] +PIC.Core.CS003D40.phaseCenter = [3826540.952500,460957.174193,5064925.839000] +PIC.Core.CS003D41.phaseCenter = [3826548.578510,460932.938194,5064922.305000] +PIC.Core.CS003D42.phaseCenter = [3826548.197510,460912.976194,5064924.395000] +PIC.Core.CS003D43.phaseCenter = [3826540.275510,460900.877193,5064931.436000] +PIC.Core.CS003D44.phaseCenter = [3826505.248510,460892.388190,5064958.494000] +PIC.Core.CS003D45.phaseCenter = [3826493.321510,460909.823190,5064965.871000] +PIC.Core.CS003D46.phaseCenter = [3826484.521500,460922.503189,5064971.330990] +PIC.Core.CS003D47.phaseCenter = [3826497.425500,460961.517190,5064958.119000] +PIC.Core.CS004D00.phaseCenter = [3826640.767510,460946.686200,5064851.863000] +PIC.Core.CS004D01.phaseCenter = [3826639.743510,460949.538200,5064852.374000] +PIC.Core.CS004D02.phaseCenter = [3826649.069510,460959.139201,5064844.506000] +PIC.Core.CS004D03.phaseCenter = [3826661.881510,460956.316202,5064835.144000] +PIC.Core.CS004D04.phaseCenter = [3826665.390510,460953.832202,5064832.734000] +PIC.Core.CS004D05.phaseCenter = [3826668.624510,460942.216202,5064831.357000] +PIC.Core.CS004D06.phaseCenter = [3826666.474510,460929.403202,5064834.129000] +PIC.Core.CS004D07.phaseCenter = [3826663.820510,460922.371202,5064836.757000] +PIC.Core.CS004D08.phaseCenter = [3826652.313510,460921.824201,5064845.443000] +PIC.Core.CS004D09.phaseCenter = [3826645.822510,460923.306201,5064850.182000] +PIC.Core.CS004D10.phaseCenter = [3826641.039510,460931.555200,5064853.027000] +PIC.Core.CS004D11.phaseCenter = [3826634.920510,460934.914200,5064857.316000] +PIC.Core.CS004D12.phaseCenter = [3826635.046510,460947.866200,5064856.050000] +PIC.Core.CS004D13.phaseCenter = [3826640.133510,460958.679200,5064851.255000] +PIC.Core.CS004D14.phaseCenter = [3826644.043510,460962.338201,5064847.989000] +PIC.Core.CS004D15.phaseCenter = [3826650.001510,460965.905201,5064843.194000] +PIC.Core.CS004D16.phaseCenter = [3826663.496510,460959.809202,5064833.616000] +PIC.Core.CS004D17.phaseCenter = [3826668.745510,460954.927202,5064830.117000] +PIC.Core.CS004D18.phaseCenter = [3826674.830510,460942.977203,5064826.630000] +PIC.Core.CS004D19.phaseCenter = [3826672.992510,460929.132203,5064829.261000] +PIC.Core.CS004D20.phaseCenter = [3826669.685510,460921.047203,5064832.474000] +PIC.Core.CS004D21.phaseCenter = [3826661.423510,460916.036202,5064839.129000] +PIC.Core.CS004D22.phaseCenter = [3826648.744510,460913.865201,5064848.842000] +PIC.Core.CS004D23.phaseCenter = [3826636.943510,460924.505200,5064856.738000] +PIC.Core.CS004D24.phaseCenter = [3826627.470510,460932.254199,5064863.148000] +PIC.Core.CS004D25.phaseCenter = [3826629.505510,460946.744200,5064860.311000] +PIC.Core.CS004D26.phaseCenter = [3826634.276510,460959.716200,5064855.557000] +PIC.Core.CS004D27.phaseCenter = [3826637.911510,460966.041200,5064852.258000] +PIC.Core.CS004D28.phaseCenter = [3826652.272510,460971.023201,5064841.027000] +PIC.Core.CS004D29.phaseCenter = [3826666.703510,460968.985202,5064830.380000] +PIC.Core.CS004D30.phaseCenter = [3826674.477510,460960.948203,5064825.271000] +PIC.Core.CS004D31.phaseCenter = [3826675.648510,460951.654203,5064825.232000] +PIC.Core.CS004D32.phaseCenter = [3826681.621510,460938.692203,5064821.920000] +PIC.Core.CS004D33.phaseCenter = [3826677.602510,460920.696203,5064826.563000] +PIC.Core.CS004D34.phaseCenter = [3826673.068510,460914.075203,5064830.565000] +PIC.Core.CS004D35.phaseCenter = [3826662.436510,460905.971202,5064839.278000] +PIC.Core.CS004D36.phaseCenter = [3826655.882510,460907.279202,5064844.079000] +PIC.Core.CS004D37.phaseCenter = [3826639.744510,460912.676200,5064855.705000] +PIC.Core.CS004D38.phaseCenter = [3826637.164510,460918.719200,5064857.095000] +PIC.Core.CS004D39.phaseCenter = [3826632.809510,460915.885200,5064860.620000] +PIC.Core.CS004D40.phaseCenter = [3826622.115510,460953.317199,5064865.264000] +PIC.Core.CS004D41.phaseCenter = [3826634.822510,460972.536200,5064853.989000] +PIC.Core.CS004D42.phaseCenter = [3826648.933510,460981.170201,5064842.617000] +PIC.Core.CS004D43.phaseCenter = [3826661.693510,460978.096202,5064833.316000] +PIC.Core.CS004D44.phaseCenter = [3826686.760510,460944.113204,5064817.572000] +PIC.Core.CS004D45.phaseCenter = [3826681.140510,460923.330203,5064823.669000] +PIC.Core.CS004D46.phaseCenter = [3826677.121510,460908.080203,5064828.064000] +PIC.Core.CS004D47.phaseCenter = [3826642.893510,460904.339201,5064854.095000] +PIC.Core.CS005D00.phaseCenter = [3826683.049500,461070.888203,5064808.900000] +PIC.Core.CS005D01.phaseCenter = [3826685.005500,461068.896204,5064807.611000] +PIC.Core.CS005D02.phaseCenter = [3826680.632500,461054.715203,5064812.176000] +PIC.Core.CS005D03.phaseCenter = [3826668.579500,461049.779202,5064821.668000] +PIC.Core.CS005D04.phaseCenter = [3826664.631500,461049.979202,5064824.614000] +PIC.Core.CS005D05.phaseCenter = [3826657.472500,461058.639202,5064829.205000] +PIC.Core.CS005D06.phaseCenter = [3826654.480500,461071.536201,5064830.285000] +PIC.Core.CS005D07.phaseCenter = [3826654.101500,461079.478201,5064829.852000] +PIC.Core.CS005D08.phaseCenter = [3826663.761500,461086.710202,5064821.947000] +PIC.Core.CS005D09.phaseCenter = [3826669.887500,461089.165202,5064817.127000] +PIC.Core.CS005D10.phaseCenter = [3826677.104500,461084.473203,5064812.134000] +PIC.Core.CS005D11.phaseCenter = [3826683.619500,461085.005203,5064807.196000] +PIC.Core.CS005D12.phaseCenter = [3826688.400500,461073.167204,5064804.677000] +PIC.Core.CS005D13.phaseCenter = [3826688.121500,461060.365204,5064806.043000] +PIC.Core.CS005D14.phaseCenter = [3826686.149500,461054.752204,5064808.031000] +PIC.Core.CS005D15.phaseCenter = [3826682.387500,461048.024203,5064811.463000] +PIC.Core.CS005D16.phaseCenter = [3826668.512500,461045.661202,5064822.091000] +PIC.Core.CS005D17.phaseCenter = [3826662.169500,461047.021202,5064826.729000] +PIC.Core.CS005D18.phaseCenter = [3826652.439500,461054.314201,5064833.374000] +PIC.Core.CS005D19.phaseCenter = [3826648.787500,461067.965201,5064834.881000] +PIC.Core.CS005D20.phaseCenter = [3826648.570500,461077.246201,5064834.205000] +PIC.Core.CS005D21.phaseCenter = [3826653.765500,461086.635201,5064829.457000] +PIC.Core.CS005D22.phaseCenter = [3826663.817500,461096.031202,5064821.063000] +PIC.Core.CS005D23.phaseCenter = [3826677.955500,461093.274203,5064810.700000] +PIC.Core.CS005D24.phaseCenter = [3826689.003500,461091.782204,5064802.541000] +PIC.Core.CS005D25.phaseCenter = [3826692.729500,461077.430204,5064801.042000] +PIC.Core.CS005D26.phaseCenter = [3826693.534500,461062.853204,5064801.755000] +PIC.Core.CS005D27.phaseCenter = [3826692.806500,461054.979204,5064803.014000] +PIC.Core.CS005D28.phaseCenter = [3826682.372500,461042.045203,5064812.015000] +PIC.Core.CS005D29.phaseCenter = [3826669.227500,461035.447202,5064822.477000] +PIC.Core.CS005D30.phaseCenter = [3826659.527500,461038.196202,5064829.511000] +PIC.Core.CS005D31.phaseCenter = [3826655.013500,461045.953201,5064832.198000] +PIC.Core.CS005D32.phaseCenter = [3826644.998500,461054.230201,5064838.967000] +PIC.Core.CS005D33.phaseCenter = [3826641.649500,461072.931200,5064839.790000] +PIC.Core.CS005D34.phaseCenter = [3826643.037500,461081.599200,5064837.965000] +PIC.Core.CS005D35.phaseCenter = [3826649.096500,461095.184201,5064832.189000] +PIC.Core.CS005D36.phaseCenter = [3826655.209500,461097.834201,5064827.361000] +PIC.Core.CS005D37.phaseCenter = [3826671.086500,461102.380203,5064815.033000] +PIC.Core.CS005D38.phaseCenter = [3826675.580500,461098.401203,5064812.019000] +PIC.Core.CS005D39.phaseCenter = [3826678.245500,461103.525203,5064809.556000] +PIC.Core.CS005D40.phaseCenter = [3826701.548500,461075.785205,5064794.571000] +PIC.Core.CS005D41.phaseCenter = [3826697.907500,461050.887205,5064799.555000] +PIC.Core.CS005D42.phaseCenter = [3826689.066500,461034.784204,5064807.646000] +PIC.Core.CS005D43.phaseCenter = [3826676.963500,461030.104203,5064817.154000] +PIC.Core.CS005D44.phaseCenter = [3826642.637500,461046.296200,5064841.456000] +PIC.Core.CS005D45.phaseCenter = [3826639.610500,461068.466200,5064841.724000] +PIC.Core.CS005D46.phaseCenter = [3826637.298500,461084.670200,5064841.995000] +PIC.Core.CS005D47.phaseCenter = [3826665.238500,461108.110202,5064818.904000] +PIC.Core.CS006D00.phaseCenter = [3826588.011490,461159.899196,5064872.192000] +PIC.Core.CS006D01.phaseCenter = [3826588.329490,461162.913196,5064871.681000] +PIC.Core.CS006D02.phaseCenter = [3826600.597490,461164.616197,5064862.319000] +PIC.Core.CS006D03.phaseCenter = [3826610.626490,461153.837198,5064855.765000] +PIC.Core.CS006D04.phaseCenter = [3826612.645490,461149.482198,5064854.643000] +PIC.Core.CS006D05.phaseCenter = [3826610.530490,461137.826198,5064857.284000] +PIC.Core.CS006D06.phaseCenter = [3826603.184490,461128.748198,5064863.619000] +PIC.Core.CS006D07.phaseCenter = [3826597.860490,461124.740197,5064867.977000] +PIC.Core.CS006D08.phaseCenter = [3826587.540490,461131.897196,5064875.077000] +PIC.Core.CS006D09.phaseCenter = [3826582.480490,461137.401196,5064878.377000] +PIC.Core.CS006D10.phaseCenter = [3826581.802490,461147.323196,5064877.990000] +PIC.Core.CS006D11.phaseCenter = [3826577.868490,461154.117196,5064880.328000] +PIC.Core.CS006D12.phaseCenter = [3826583.498490,461164.646196,5064875.150000] +PIC.Core.CS006D13.phaseCenter = [3826592.566490,461170.144197,5064867.847000] +PIC.Core.CS006D14.phaseCenter = [3826597.553490,461170.559197,5064864.066000] +PIC.Core.CS006D15.phaseCenter = [3826604.298490,461169.544198,5064859.095000] +PIC.Core.CS006D16.phaseCenter = [3826613.531490,461155.630198,5064853.422000] +PIC.Core.CS006D17.phaseCenter = [3826616.052490,461148.162198,5064852.205000] +PIC.Core.CS006D18.phaseCenter = [3826616.294490,461134.350198,5064853.271000] +PIC.Core.CS006D19.phaseCenter = [3826608.784490,461124.219198,5064859.825000] +PIC.Core.CS006D20.phaseCenter = [3826602.439490,461119.780197,5064864.989000] +PIC.Core.CS006D21.phaseCenter = [3826593.059490,461121.135197,5064871.907000] +PIC.Core.CS006D22.phaseCenter = [3826581.018490,461127.734196,5064880.348000] +PIC.Core.CS006D23.phaseCenter = [3826575.206490,461144.252195,5064883.218000] +PIC.Core.CS006D24.phaseCenter = [3826570.204490,461156.862195,5064885.833000] +PIC.Core.CS006D25.phaseCenter = [3826578.162490,461167.388196,5064878.908000] +PIC.Core.CS006D26.phaseCenter = [3826587.873490,461174.865196,5064870.943000] +PIC.Core.CS006D27.phaseCenter = [3826593.755490,461177.645197,5064866.276000] +PIC.Core.CS006D28.phaseCenter = [3826608.469490,461172.237198,5064855.721000] +PIC.Core.CS006D29.phaseCenter = [3826620.253490,461161.031199,5064847.889000] +PIC.Core.CS006D30.phaseCenter = [3826623.644490,461149.308199,5064846.403000] +PIC.Core.CS006D31.phaseCenter = [3826620.710490,461140.918199,5064849.364000] +PIC.Core.CS006D32.phaseCenter = [3826620.422490,461126.350199,5064850.896000] +PIC.Core.CS006D33.phaseCenter = [3826609.230500,461114.260198,5064860.390000] +PIC.Core.CS006D34.phaseCenter = [3826602.433500,461111.832197,5064865.712000] +PIC.Core.CS006D35.phaseCenter = [3826589.658490,461112.218197,5064875.266000] +PIC.Core.CS006D36.phaseCenter = [3826584.469490,461117.621196,5064878.672000] +PIC.Core.CS006D37.phaseCenter = [3826572.621490,461132.708195,5064886.202000] +PIC.Core.CS006D38.phaseCenter = [3826572.934490,461139.365195,5064885.365000] +PIC.Core.CS006D39.phaseCenter = [3826567.908490,461139.921195,5064889.087000] +PIC.Core.CS006D40.phaseCenter = [3826574.484490,461177.659195,5064880.740000] +PIC.Core.CS006D41.phaseCenter = [3826593.816490,461185.008197,5064865.565000] +PIC.Core.CS006D42.phaseCenter = [3826609.866490,461182.757198,5064853.721000] +PIC.Core.CS006D43.phaseCenter = [3826619.744490,461171.806199,5064847.297000] +PIC.Core.CS006D44.phaseCenter = [3826627.238500,461127.396199,5064845.686000] +PIC.Core.CS006D45.phaseCenter = [3826613.453500,461114.081198,5064857.236000] +PIC.Core.CS006D46.phaseCenter = [3826603.432500,461104.241198,5064865.647000] +PIC.Core.CS006D47.phaseCenter = [3826571.828490,461123.796195,5064887.603000] +PIC.Core.CS007D00.phaseCenter = [3826527.909490,461082.763192,5064924.277000] +PIC.Core.CS007D01.phaseCenter = [3826525.636490,461081.740192,5064926.075000] +PIC.Core.CS007D02.phaseCenter = [3826517.741490,461094.218191,5064930.874000] +PIC.Core.CS007D03.phaseCenter = [3826519.764490,461109.935191,5064927.935000] +PIC.Core.CS007D04.phaseCenter = [3826521.693490,461114.089192,5064926.111000] +PIC.Core.CS007D05.phaseCenter = [3826530.968490,461117.085192,5064918.878000] +PIC.Core.CS007D06.phaseCenter = [3826541.306490,461113.264193,5064911.463000] +PIC.Core.CS007D07.phaseCenter = [3826547.009490,461109.331193,5064907.538000] +PIC.Core.CS007D08.phaseCenter = [3826547.669490,461094.942193,5064908.344000] +PIC.Core.CS007D09.phaseCenter = [3826546.603490,461086.983193,5064909.863000] +PIC.Core.CS007D10.phaseCenter = [3826540.064500,461081.754193,5064915.244000] +PIC.Core.CS007D11.phaseCenter = [3826537.483500,461074.428193,5064917.844000] +PIC.Core.CS007D12.phaseCenter = [3826527.071490,461075.738192,5064925.540000] +PIC.Core.CS007D13.phaseCenter = [3826518.282490,461083.039191,5064931.477000] +PIC.Core.CS007D14.phaseCenter = [3826515.266490,461088.238191,5064933.272000] +PIC.Core.CS007D15.phaseCenter = [3826512.285490,461095.982191,5064934.809000] +PIC.Core.CS007D16.phaseCenter = [3826516.926490,461112.259191,5064929.854000] +PIC.Core.CS007D17.phaseCenter = [3826520.749490,461118.367191,5064926.433000] +PIC.Core.CS007D18.phaseCenter = [3826530.236490,461124.886192,5064918.722000] +PIC.Core.CS007D19.phaseCenter = [3826541.398490,461121.365193,5064910.662000] +PIC.Core.CS007D20.phaseCenter = [3826547.961490,461116.524193,5064906.174000] +PIC.Core.CS007D21.phaseCenter = [3826552.146490,461105.780194,5064904.004000] +PIC.Core.CS007D22.phaseCenter = [3826554.135490,461089.784194,5064903.957000] +PIC.Core.CS007D23.phaseCenter = [3826545.809500,461076.022193,5064911.449000] +PIC.Core.CS007D24.phaseCenter = [3826539.762500,461064.905193,5064916.993000] +PIC.Core.CS007D25.phaseCenter = [3826528.079500,461068.731192,5064925.417000] +PIC.Core.CS007D26.phaseCenter = [3826517.561490,461075.832191,5064932.670000] +PIC.Core.CS007D27.phaseCenter = [3826512.409490,461080.924191,5064936.078000] +PIC.Core.CS007D28.phaseCenter = [3826508.128490,461099.268191,5064937.631990] +PIC.Core.CS007D29.phaseCenter = [3826509.489490,461117.072191,5064935.001990] +PIC.Core.CS007D30.phaseCenter = [3826515.799490,461126.046191,5064929.454000] +PIC.Core.CS007D31.phaseCenter = [3826523.247490,461126.679192,5064923.806000] +PIC.Core.CS007D32.phaseCenter = [3826533.550490,461132.970192,5064915.505000] +PIC.Core.CS007D33.phaseCenter = [3826548.091490,461126.360193,5064905.187000] +PIC.Core.CS007D34.phaseCenter = [3826553.499490,461120.120194,5064901.692000] +PIC.Core.CS007D35.phaseCenter = [3826560.216490,461106.147194,5064897.913000] +PIC.Core.CS007D36.phaseCenter = [3826559.291490,461098.095194,5064899.335000] +PIC.Core.CS007D37.phaseCenter = [3826555.263500,461078.462194,5064904.133000] +PIC.Core.CS007D38.phaseCenter = [3826550.456500,461075.783194,5064907.984000] +PIC.Core.CS007D39.phaseCenter = [3826552.817500,461070.104194,5064906.725000] +PIC.Core.CS007D40.phaseCenter = [3826522.937500,461060.105192,5064930.056000] +PIC.Core.CS007D41.phaseCenter = [3826507.247490,461077.654191,5064940.246990] +PIC.Core.CS007D42.phaseCenter = [3826500.037490,461096.010190,5064943.999990] +PIC.Core.CS007D43.phaseCenter = [3826502.263490,461111.639190,5064940.916990] +PIC.Core.CS007D44.phaseCenter = [3826529.095490,461139.857192,5064918.226000] +PIC.Core.CS007D45.phaseCenter = [3826545.906490,461131.003193,5064906.408000] +PIC.Core.CS007D46.phaseCenter = [3826558.240490,461124.638194,5064897.725000] +PIC.Core.CS007D47.phaseCenter = [3826561.903500,461081.644195,5064898.861000] +PIC.Core.CS002D00.position = [3826579.492500,461005.105196,5064892.578000] +PIC.Core.CS002D01.position = [3826578.065500,461002.706196,5064893.866000] +PIC.Core.CS002D02.position = [3826566.278500,461008.429195,5064902.196000] +PIC.Core.CS002D03.position = [3826561.238500,461023.545195,5064904.613000] +PIC.Core.CS002D04.position = [3826561.048500,461028.462194,5064904.311000] +PIC.Core.CS002D05.position = [3826567.371500,461037.218195,5064898.774000] +PIC.Core.CS002D06.position = [3826577.471500,461040.701196,5064890.878000] +PIC.Core.CS002D07.position = [3826583.820500,461041.021196,5064886.083000] +PIC.Core.CS002D08.position = [3826590.492500,461028.842197,5064882.175000] +PIC.Core.CS002D09.position = [3826593.010500,461021.160197,5064880.980000] +PIC.Core.CS002D10.position = [3826589.881500,461012.254197,5064884.134000] +PIC.Core.CS002D11.position = [3826590.888500,461004.124197,5064884.112000] +PIC.Core.CS002D12.position = [3826581.799500,460998.392196,5064891.453000] +PIC.Core.CS002D13.position = [3826571.486500,460998.986195,5064899.140000] +PIC.Core.CS002D14.position = [3826566.799500,461001.550195,5064902.426000] +PIC.Core.CS002D15.position = [3826561.056500,461006.369194,5064906.301000] +PIC.Core.CS002D16.position = [3826557.923500,461023.707194,5064907.087000] +PIC.Core.CS002D17.position = [3826558.452500,461031.588194,5064905.977000] +PIC.Core.CS002D18.position = [3826563.447500,461043.574195,5064901.145000] +PIC.Core.CS002D19.position = [3826574.094500,461047.864195,5064892.765000] +PIC.Core.CS002D20.position = [3826581.534500,461047.956196,5064887.172000] +PIC.Core.CS002D21.position = [3826589.542500,461041.302197,5064881.763000] +PIC.Core.CS002D22.position = [3826597.988500,461028.593197,5064876.571000] +PIC.Core.CS002D23.position = [3826597.031500,461011.026197,5064878.878000] +PIC.Core.CS002D24.position = [3826596.814500,460997.283197,5064880.282000] +PIC.Core.CS002D25.position = [3826585.611500,460992.916196,5064889.087000] +PIC.Core.CS002D26.position = [3826573.967500,460992.191195,5064897.892000] +PIC.Core.CS002D27.position = [3826567.574500,460993.250195,5064902.595000] +PIC.Core.CS002D28.position = [3826556.250500,461006.503194,5064909.897000] +PIC.Core.CS002D29.position = [3826549.777500,461023.012194,5064913.263000] +PIC.Core.CS002D30.position = [3826551.123500,461035.050194,5064911.165000] +PIC.Core.CS002D31.position = [3826556.956500,461040.527194,5064906.292000] +PIC.Core.CS002D32.position = [3826562.717500,461052.851195,5064900.854000] +PIC.Core.CS002D33.position = [3826577.449500,461056.665196,5064889.451000] +PIC.Core.CS002D34.position = [3826584.539500,461054.769196,5064884.300000] +PIC.Core.CS002D35.position = [3826595.997500,461046.957197,5064876.406000] +PIC.Core.CS002D36.position = [3826598.671500,461039.287197,5064875.092000] +PIC.Core.CS002D37.position = [3826603.738500,461019.411198,5064873.085000] +PIC.Core.CS002D38.position = [3826600.940500,461013.887197,5064875.685000] +PIC.Core.CS002D39.position = [3826605.295500,461010.467198,5064872.725000] +PIC.Core.CS002D40.position = [3826585.073500,460981.955196,5064890.481000] +PIC.Core.CS002D41.position = [3826564.739500,460986.971195,5064905.291000] +PIC.Core.CS002D42.position = [3826551.009500,460998.299194,5064914.572000] +PIC.Core.CS002D43.position = [3826546.172500,461013.473193,5064916.832000] +PIC.Core.CS002D44.position = [3826556.131500,461055.945194,5064905.517000] +PIC.Core.CS002D45.position = [3826573.680500,461059.292195,5064892.043000] +PIC.Core.CS002D46.position = [3826586.498500,461061.863196,5064882.189000] +PIC.Core.CS002D47.position = [3826607.822500,461026.590198,5064869.371000] +PIC.Core.CS003D00.position = [3826527.881500,460940.655192,5064937.143000] +PIC.Core.CS003D01.position = [3826530.332500,460940.201192,5064935.344000] +PIC.Core.CS003D02.position = [3826532.793510,460924.878192,5064934.882000] +PIC.Core.CS003D03.position = [3826525.021510,460912.584192,5064941.827000] +PIC.Core.CS003D04.position = [3826521.701510,460910.152192,5064944.539000] +PIC.Core.CS003D05.position = [3826512.144510,460913.012191,5064951.454000] +PIC.Core.CS003D06.position = [3826504.196510,460922.342190,5064956.577000] +PIC.Core.CS003D07.position = [3826500.500500,460929.054190,5064958.744000] +PIC.Core.CS003D08.position = [3826505.334500,460941.780190,5064953.966000] +PIC.Core.CS003D09.position = [3826509.307500,460947.981191,5064950.423000] +PIC.Core.CS003D10.position = [3826517.220500,460948.636191,5064944.424000] +PIC.Core.CS003D11.position = [3826522.331500,460953.408192,5064940.156000] +PIC.Core.CS003D12.position = [3826531.295500,460946.188192,5064934.080000] +PIC.Core.CS003D13.position = [3826536.521510,460934.781193,5064931.189000] +PIC.Core.CS003D14.position = [3826537.298510,460928.557193,5064931.168000] +PIC.Core.CS003D15.position = [3826537.082510,460920.171193,5064932.088000] +PIC.Core.CS003D16.position = [3826526.721510,460908.930192,5064940.881000] +PIC.Core.CS003D17.position = [3826520.943510,460905.931192,5064945.489000] +PIC.Core.CS003D18.position = [3826509.863510,460905.894191,5064953.809000] +PIC.Core.CS003D19.position = [3826501.054510,460915.450190,5064959.558000] +PIC.Core.CS003D20.position = [3826496.920500,460923.443190,5064961.938000] +PIC.Core.CS003D21.position = [3826497.175500,460935.107190,5064960.693000] +PIC.Core.CS003D22.position = [3826501.408500,460949.988190,5064956.171000] +PIC.Core.CS003D23.position = [3826514.166500,460956.915191,5064945.968000] +PIC.Core.CS003D24.position = [3826523.855500,460962.908192,5064938.154000] +PIC.Core.CS003D25.position = [3826533.025500,460952.786192,5064932.186000] +PIC.Core.CS003D26.position = [3826539.897510,460940.539193,5064928.134000] +PIC.Core.CS003D27.position = [3826542.655510,460933.155193,5064926.731000] +PIC.Core.CS003D28.position = [3826539.617510,460914.920193,5064930.660000] +PIC.Core.CS003D29.position = [3826531.660510,460900.448192,5064937.941000] +PIC.Core.CS003D30.position = [3826522.539510,460896.447192,5064945.148000] +PIC.Core.CS003D31.position = [3826515.535510,460900.265191,5064950.061000] +PIC.Core.CS003D32.position = [3826503.801510,460900.903190,5064958.811000] +PIC.Core.CS003D33.position = [3826493.089510,460915.084190,5064965.570000] +PIC.Core.CS003D34.position = [3826490.532500,460923.602189,5064966.718990] +PIC.Core.CS003D35.position = [3826489.706500,460939.517189,5064965.900990] +PIC.Core.CS003D36.position = [3826493.586500,460945.880190,5064962.412990] +PIC.Core.CS003D37.position = [3826504.658500,460960.358190,5064952.794000] +PIC.Core.CS003D38.position = [3826510.036500,460959.840191,5064948.804000] +PIC.Core.CS003D39.position = [3826510.036500,460966.093191,5064948.239000] +PIC.Core.CS003D40.position = [3826540.952500,460957.174193,5064925.839000] +PIC.Core.CS003D41.position = [3826548.578510,460932.938194,5064922.305000] +PIC.Core.CS003D42.position = [3826548.197510,460912.976194,5064924.395000] +PIC.Core.CS003D43.position = [3826540.275510,460900.877193,5064931.436000] +PIC.Core.CS003D44.position = [3826505.248510,460892.388190,5064958.494000] +PIC.Core.CS003D45.position = [3826493.321510,460909.823190,5064965.871000] +PIC.Core.CS003D46.position = [3826484.521500,460922.503189,5064971.330990] +PIC.Core.CS003D47.position = [3826497.425500,460961.517190,5064958.119000] +PIC.Core.CS004D00.position = [3826640.767510,460946.686200,5064851.863000] +PIC.Core.CS004D01.position = [3826639.743510,460949.538200,5064852.374000] +PIC.Core.CS004D02.position = [3826649.069510,460959.139201,5064844.506000] +PIC.Core.CS004D03.position = [3826661.881510,460956.316202,5064835.144000] +PIC.Core.CS004D04.position = [3826665.390510,460953.832202,5064832.734000] +PIC.Core.CS004D05.position = [3826668.624510,460942.216202,5064831.357000] +PIC.Core.CS004D06.position = [3826666.474510,460929.403202,5064834.129000] +PIC.Core.CS004D07.position = [3826663.820510,460922.371202,5064836.757000] +PIC.Core.CS004D08.position = [3826652.313510,460921.824201,5064845.443000] +PIC.Core.CS004D09.position = [3826645.822510,460923.306201,5064850.182000] +PIC.Core.CS004D10.position = [3826641.039510,460931.555200,5064853.027000] +PIC.Core.CS004D11.position = [3826634.920510,460934.914200,5064857.316000] +PIC.Core.CS004D12.position = [3826635.046510,460947.866200,5064856.050000] +PIC.Core.CS004D13.position = [3826640.133510,460958.679200,5064851.255000] +PIC.Core.CS004D14.position = [3826644.043510,460962.338201,5064847.989000] +PIC.Core.CS004D15.position = [3826650.001510,460965.905201,5064843.194000] +PIC.Core.CS004D16.position = [3826663.496510,460959.809202,5064833.616000] +PIC.Core.CS004D17.position = [3826668.745510,460954.927202,5064830.117000] +PIC.Core.CS004D18.position = [3826674.830510,460942.977203,5064826.630000] +PIC.Core.CS004D19.position = [3826672.992510,460929.132203,5064829.261000] +PIC.Core.CS004D20.position = [3826669.685510,460921.047203,5064832.474000] +PIC.Core.CS004D21.position = [3826661.423510,460916.036202,5064839.129000] +PIC.Core.CS004D22.position = [3826648.744510,460913.865201,5064848.842000] +PIC.Core.CS004D23.position = [3826636.943510,460924.505200,5064856.738000] +PIC.Core.CS004D24.position = [3826627.470510,460932.254199,5064863.148000] +PIC.Core.CS004D25.position = [3826629.505510,460946.744200,5064860.311000] +PIC.Core.CS004D26.position = [3826634.276510,460959.716200,5064855.557000] +PIC.Core.CS004D27.position = [3826637.911510,460966.041200,5064852.258000] +PIC.Core.CS004D28.position = [3826652.272510,460971.023201,5064841.027000] +PIC.Core.CS004D29.position = [3826666.703510,460968.985202,5064830.380000] +PIC.Core.CS004D30.position = [3826674.477510,460960.948203,5064825.271000] +PIC.Core.CS004D31.position = [3826675.648510,460951.654203,5064825.232000] +PIC.Core.CS004D32.position = [3826681.621510,460938.692203,5064821.920000] +PIC.Core.CS004D33.position = [3826677.602510,460920.696203,5064826.563000] +PIC.Core.CS004D34.position = [3826673.068510,460914.075203,5064830.565000] +PIC.Core.CS004D35.position = [3826662.436510,460905.971202,5064839.278000] +PIC.Core.CS004D36.position = [3826655.882510,460907.279202,5064844.079000] +PIC.Core.CS004D37.position = [3826639.744510,460912.676200,5064855.705000] +PIC.Core.CS004D38.position = [3826637.164510,460918.719200,5064857.095000] +PIC.Core.CS004D39.position = [3826632.809510,460915.885200,5064860.620000] +PIC.Core.CS004D40.position = [3826622.115510,460953.317199,5064865.264000] +PIC.Core.CS004D41.position = [3826634.822510,460972.536200,5064853.989000] +PIC.Core.CS004D42.position = [3826648.933510,460981.170201,5064842.617000] +PIC.Core.CS004D43.position = [3826661.693510,460978.096202,5064833.316000] +PIC.Core.CS004D44.position = [3826686.760510,460944.113204,5064817.572000] +PIC.Core.CS004D45.position = [3826681.140510,460923.330203,5064823.669000] +PIC.Core.CS004D46.position = [3826677.121510,460908.080203,5064828.064000] +PIC.Core.CS004D47.position = [3826642.893510,460904.339201,5064854.095000] +PIC.Core.CS005D00.position = [3826683.049500,461070.888203,5064808.900000] +PIC.Core.CS005D01.position = [3826685.005500,461068.896204,5064807.611000] +PIC.Core.CS005D02.position = [3826680.632500,461054.715203,5064812.176000] +PIC.Core.CS005D03.position = [3826668.579500,461049.779202,5064821.668000] +PIC.Core.CS005D04.position = [3826664.631500,461049.979202,5064824.614000] +PIC.Core.CS005D05.position = [3826657.472500,461058.639202,5064829.205000] +PIC.Core.CS005D06.position = [3826654.480500,461071.536201,5064830.285000] +PIC.Core.CS005D07.position = [3826654.101500,461079.478201,5064829.852000] +PIC.Core.CS005D08.position = [3826663.761500,461086.710202,5064821.947000] +PIC.Core.CS005D09.position = [3826669.887500,461089.165202,5064817.127000] +PIC.Core.CS005D10.position = [3826677.104500,461084.473203,5064812.134000] +PIC.Core.CS005D11.position = [3826683.619500,461085.005203,5064807.196000] +PIC.Core.CS005D12.position = [3826688.400500,461073.167204,5064804.677000] +PIC.Core.CS005D13.position = [3826688.121500,461060.365204,5064806.043000] +PIC.Core.CS005D14.position = [3826686.149500,461054.752204,5064808.031000] +PIC.Core.CS005D15.position = [3826682.387500,461048.024203,5064811.463000] +PIC.Core.CS005D16.position = [3826668.512500,461045.661202,5064822.091000] +PIC.Core.CS005D17.position = [3826662.169500,461047.021202,5064826.729000] +PIC.Core.CS005D18.position = [3826652.439500,461054.314201,5064833.374000] +PIC.Core.CS005D19.position = [3826648.787500,461067.965201,5064834.881000] +PIC.Core.CS005D20.position = [3826648.570500,461077.246201,5064834.205000] +PIC.Core.CS005D21.position = [3826653.765500,461086.635201,5064829.457000] +PIC.Core.CS005D22.position = [3826663.817500,461096.031202,5064821.063000] +PIC.Core.CS005D23.position = [3826677.955500,461093.274203,5064810.700000] +PIC.Core.CS005D24.position = [3826689.003500,461091.782204,5064802.541000] +PIC.Core.CS005D25.position = [3826692.729500,461077.430204,5064801.042000] +PIC.Core.CS005D26.position = [3826693.534500,461062.853204,5064801.755000] +PIC.Core.CS005D27.position = [3826692.806500,461054.979204,5064803.014000] +PIC.Core.CS005D28.position = [3826682.372500,461042.045203,5064812.015000] +PIC.Core.CS005D29.position = [3826669.227500,461035.447202,5064822.477000] +PIC.Core.CS005D30.position = [3826659.527500,461038.196202,5064829.511000] +PIC.Core.CS005D31.position = [3826655.013500,461045.953201,5064832.198000] +PIC.Core.CS005D32.position = [3826644.998500,461054.230201,5064838.967000] +PIC.Core.CS005D33.position = [3826641.649500,461072.931200,5064839.790000] +PIC.Core.CS005D34.position = [3826643.037500,461081.599200,5064837.965000] +PIC.Core.CS005D35.position = [3826649.096500,461095.184201,5064832.189000] +PIC.Core.CS005D36.position = [3826655.209500,461097.834201,5064827.361000] +PIC.Core.CS005D37.position = [3826671.086500,461102.380203,5064815.033000] +PIC.Core.CS005D38.position = [3826675.580500,461098.401203,5064812.019000] +PIC.Core.CS005D39.position = [3826678.245500,461103.525203,5064809.556000] +PIC.Core.CS005D40.position = [3826701.548500,461075.785205,5064794.571000] +PIC.Core.CS005D41.position = [3826697.907500,461050.887205,5064799.555000] +PIC.Core.CS005D42.position = [3826689.066500,461034.784204,5064807.646000] +PIC.Core.CS005D43.position = [3826676.963500,461030.104203,5064817.154000] +PIC.Core.CS005D44.position = [3826642.637500,461046.296200,5064841.456000] +PIC.Core.CS005D45.position = [3826639.610500,461068.466200,5064841.724000] +PIC.Core.CS005D46.position = [3826637.298500,461084.670200,5064841.995000] +PIC.Core.CS005D47.position = [3826665.238500,461108.110202,5064818.904000] +PIC.Core.CS006D00.position = [3826588.011490,461159.899196,5064872.192000] +PIC.Core.CS006D01.position = [3826588.329490,461162.913196,5064871.681000] +PIC.Core.CS006D02.position = [3826600.597490,461164.616197,5064862.319000] +PIC.Core.CS006D03.position = [3826610.626490,461153.837198,5064855.765000] +PIC.Core.CS006D04.position = [3826612.645490,461149.482198,5064854.643000] +PIC.Core.CS006D05.position = [3826610.530490,461137.826198,5064857.284000] +PIC.Core.CS006D06.position = [3826603.184490,461128.748198,5064863.619000] +PIC.Core.CS006D07.position = [3826597.860490,461124.740197,5064867.977000] +PIC.Core.CS006D08.position = [3826587.540490,461131.897196,5064875.077000] +PIC.Core.CS006D09.position = [3826582.480490,461137.401196,5064878.377000] +PIC.Core.CS006D10.position = [3826581.802490,461147.323196,5064877.990000] +PIC.Core.CS006D11.position = [3826577.868490,461154.117196,5064880.328000] +PIC.Core.CS006D12.position = [3826583.498490,461164.646196,5064875.150000] +PIC.Core.CS006D13.position = [3826592.566490,461170.144197,5064867.847000] +PIC.Core.CS006D14.position = [3826597.553490,461170.559197,5064864.066000] +PIC.Core.CS006D15.position = [3826604.298490,461169.544198,5064859.095000] +PIC.Core.CS006D16.position = [3826613.531490,461155.630198,5064853.422000] +PIC.Core.CS006D17.position = [3826616.052490,461148.162198,5064852.205000] +PIC.Core.CS006D18.position = [3826616.294490,461134.350198,5064853.271000] +PIC.Core.CS006D19.position = [3826608.784490,461124.219198,5064859.825000] +PIC.Core.CS006D20.position = [3826602.439490,461119.780197,5064864.989000] +PIC.Core.CS006D21.position = [3826593.059490,461121.135197,5064871.907000] +PIC.Core.CS006D22.position = [3826581.018490,461127.734196,5064880.348000] +PIC.Core.CS006D23.position = [3826575.206490,461144.252195,5064883.218000] +PIC.Core.CS006D24.position = [3826570.204490,461156.862195,5064885.833000] +PIC.Core.CS006D25.position = [3826578.162490,461167.388196,5064878.908000] +PIC.Core.CS006D26.position = [3826587.873490,461174.865196,5064870.943000] +PIC.Core.CS006D27.position = [3826593.755490,461177.645197,5064866.276000] +PIC.Core.CS006D28.position = [3826608.469490,461172.237198,5064855.721000] +PIC.Core.CS006D29.position = [3826620.253490,461161.031199,5064847.889000] +PIC.Core.CS006D30.position = [3826623.644490,461149.308199,5064846.403000] +PIC.Core.CS006D31.position = [3826620.710490,461140.918199,5064849.364000] +PIC.Core.CS006D32.position = [3826620.422490,461126.350199,5064850.896000] +PIC.Core.CS006D33.position = [3826609.230500,461114.260198,5064860.390000] +PIC.Core.CS006D34.position = [3826602.433500,461111.832197,5064865.712000] +PIC.Core.CS006D35.position = [3826589.658490,461112.218197,5064875.266000] +PIC.Core.CS006D36.position = [3826584.469490,461117.621196,5064878.672000] +PIC.Core.CS006D37.position = [3826572.621490,461132.708195,5064886.202000] +PIC.Core.CS006D38.position = [3826572.934490,461139.365195,5064885.365000] +PIC.Core.CS006D39.position = [3826567.908490,461139.921195,5064889.087000] +PIC.Core.CS006D40.position = [3826574.484490,461177.659195,5064880.740000] +PIC.Core.CS006D41.position = [3826593.816490,461185.008197,5064865.565000] +PIC.Core.CS006D42.position = [3826609.866490,461182.757198,5064853.721000] +PIC.Core.CS006D43.position = [3826619.744490,461171.806199,5064847.297000] +PIC.Core.CS006D44.position = [3826627.238500,461127.396199,5064845.686000] +PIC.Core.CS006D45.position = [3826613.453500,461114.081198,5064857.236000] +PIC.Core.CS006D46.position = [3826603.432500,461104.241198,5064865.647000] +PIC.Core.CS006D47.position = [3826571.828490,461123.796195,5064887.603000] +PIC.Core.CS007D00.position = [3826527.909490,461082.763192,5064924.277000] +PIC.Core.CS007D01.position = [3826525.636490,461081.740192,5064926.075000] +PIC.Core.CS007D02.position = [3826517.741490,461094.218191,5064930.874000] +PIC.Core.CS007D03.position = [3826519.764490,461109.935191,5064927.935000] +PIC.Core.CS007D04.position = [3826521.693490,461114.089192,5064926.111000] +PIC.Core.CS007D05.position = [3826530.968490,461117.085192,5064918.878000] +PIC.Core.CS007D06.position = [3826541.306490,461113.264193,5064911.463000] +PIC.Core.CS007D07.position = [3826547.009490,461109.331193,5064907.538000] +PIC.Core.CS007D08.position = [3826547.669490,461094.942193,5064908.344000] +PIC.Core.CS007D09.position = [3826546.603490,461086.983193,5064909.863000] +PIC.Core.CS007D10.position = [3826540.064500,461081.754193,5064915.244000] +PIC.Core.CS007D11.position = [3826537.483500,461074.428193,5064917.844000] +PIC.Core.CS007D12.position = [3826527.071490,461075.738192,5064925.540000] +PIC.Core.CS007D13.position = [3826518.282490,461083.039191,5064931.477000] +PIC.Core.CS007D14.position = [3826515.266490,461088.238191,5064933.272000] +PIC.Core.CS007D15.position = [3826512.285490,461095.982191,5064934.809000] +PIC.Core.CS007D16.position = [3826516.926490,461112.259191,5064929.854000] +PIC.Core.CS007D17.position = [3826520.749490,461118.367191,5064926.433000] +PIC.Core.CS007D18.position = [3826530.236490,461124.886192,5064918.722000] +PIC.Core.CS007D19.position = [3826541.398490,461121.365193,5064910.662000] +PIC.Core.CS007D20.position = [3826547.961490,461116.524193,5064906.174000] +PIC.Core.CS007D21.position = [3826552.146490,461105.780194,5064904.004000] +PIC.Core.CS007D22.position = [3826554.135490,461089.784194,5064903.957000] +PIC.Core.CS007D23.position = [3826545.809500,461076.022193,5064911.449000] +PIC.Core.CS007D24.position = [3826539.762500,461064.905193,5064916.993000] +PIC.Core.CS007D25.position = [3826528.079500,461068.731192,5064925.417000] +PIC.Core.CS007D26.position = [3826517.561490,461075.832191,5064932.670000] +PIC.Core.CS007D27.position = [3826512.409490,461080.924191,5064936.078000] +PIC.Core.CS007D28.position = [3826508.128490,461099.268191,5064937.631990] +PIC.Core.CS007D29.position = [3826509.489490,461117.072191,5064935.001990] +PIC.Core.CS007D30.position = [3826515.799490,461126.046191,5064929.454000] +PIC.Core.CS007D31.position = [3826523.247490,461126.679192,5064923.806000] +PIC.Core.CS007D32.position = [3826533.550490,461132.970192,5064915.505000] +PIC.Core.CS007D33.position = [3826548.091490,461126.360193,5064905.187000] +PIC.Core.CS007D34.position = [3826553.499490,461120.120194,5064901.692000] +PIC.Core.CS007D35.position = [3826560.216490,461106.147194,5064897.913000] +PIC.Core.CS007D36.position = [3826559.291490,461098.095194,5064899.335000] +PIC.Core.CS007D37.position = [3826555.263500,461078.462194,5064904.133000] +PIC.Core.CS007D38.position = [3826550.456500,461075.783194,5064907.984000] +PIC.Core.CS007D39.position = [3826552.817500,461070.104194,5064906.725000] +PIC.Core.CS007D40.position = [3826522.937500,461060.105192,5064930.056000] +PIC.Core.CS007D41.position = [3826507.247490,461077.654191,5064940.246990] +PIC.Core.CS007D42.position = [3826500.037490,461096.010190,5064943.999990] +PIC.Core.CS007D43.position = [3826502.263490,461111.639190,5064940.916990] +PIC.Core.CS007D44.position = [3826529.095490,461139.857192,5064918.226000] +PIC.Core.CS007D45.position = [3826545.906490,461131.003193,5064906.408000] +PIC.Core.CS007D46.position = [3826558.240490,461124.638194,5064897.725000] +PIC.Core.CS007D47.position = [3826561.903500,461081.644195,5064898.861000] diff --git a/RTCP/GPUProc/test/CMakeLists.txt b/RTCP/GPUProc/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..506aa37355350e6771ab94d24ed6c876cc94aaf6 --- /dev/null +++ b/RTCP/GPUProc/test/CMakeLists.txt @@ -0,0 +1,8 @@ +# $Id: CMakeLists.txt 13414 2009-06-16 22:15:37Z loose $ + +include(LofarCTest) + +# Add project's source directory to -I path. +include_directories(${PACKAGE_SOURCE_DIR}/src) + +#lofar_add_test(tDelayCompensation tDelayCompensation.cc) diff --git a/RTCP/GPUProc/test/small-test.parset b/RTCP/GPUProc/test/small-test.parset new file mode 100644 index 0000000000000000000000000000000000000000..df4038da21ab8c91469fd42da18c0d879c2e8ef1 --- /dev/null +++ b/RTCP/GPUProc/test/small-test.parset @@ -0,0 +1,16 @@ +OLAP.nrBitsPerSample = 8 +Observation.startTime = '2010-04-28 17:25:03' +Observation.stopTime = '2010-04-28 17:25:13' +OLAP.storageStationNames = [st00 .. st7] +Observation.subbandList = [300 .. 307] +Observation.bandFilter = LBA_30_90 +Observation.channelsPerSubband = 64 +OLAP.CNProc.integrationSteps = 3072 # FIXME: rename +Observation.sampleClock = 200 +OLAP.correctBandPass = true +Observation.beamList = [8 * 0] +OLAP.delayCompensation = true +Observation.Beam[0].nrTiedArrayBeams = 1 # FIXME: conditional +OLAP.CNProc_CoherentStokes.timeIntegrationFactor = 1 # FIXME: conditional +OLAP.CNProc_IncoherentStokes.timeIntegrationFactor = 1 # FIXME: conditional +OLAP.tiedArrayStationNames = [st00 .. st7] # FIXME???: conditional