diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 10b2ab9132c3ebecdc1c24611edf0c8d64ce390e..904d81afecd89cdf1cc4de5f233a0012fb84022e 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -11,10 +11,11 @@ build: - das6 script: - source scripts/load-modules.sh - - source scripts/build-cudawrappers.sh + - mkdir build && cd build + - cmake .. - make -j -test-example: +test: stage: testing tags: - das6 @@ -22,21 +23,7 @@ test-example: - build script: - source scripts/load-modules.sh - - source scripts/build-cudawrappers.sh + - mkdir build && cd build + - cmake .. -DBUILD_TESTING=On - make -j - - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/libtcc - - build-$(arch)/test/SimpleExample/SimpleExample - -test-correlator: - stage: testing - tags: - - das6 - dependencies: - - build - script: - - source scripts/load-modules.sh - - source scripts/build-cudawrappers.sh - - make -j - - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/libtcc - - build-$(arch)/test/CorrelatorTest/CorrelatorTest -n 20 - + - make test diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 5c2de547ee7275e615821173bf63de8982dd7207..0000000000000000000000000000000000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "external/cuda-wrappers"] - path = external/cuda-wrappers - url = https://github.com/nlesc-recruit/CUDA-wrappers.git diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1906bf3706cbf187fb2040933db8da4ef90598dc --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,56 @@ +cmake_minimum_required(VERSION 3.17 FATAL_ERROR) + +project( + libtcc + DESCRIPTION "Tensor-Core Correlator" + VERSION 0.5 + HOMEPAGE_URL https://git.astron.nl/RD/tensor-core-correlator + LANGUAGES CXX CUDA +) + +set(CMAKE_CXX_STANDARD 17) +option(BUILD_SHARED_LIBS "Create shared libraries" True) +option(BUILD_TESTING "Build tests" False) + +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE + "Release" + CACHE STRING "CMake build type" FORCE + ) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release") +endif() + +find_package(CUDAToolkit REQUIRED) + +include(FetchContent) +FetchContent_Declare( + cudawrappers + GIT_REPOSITORY https://github.com/nlesc-recruit/cudawrappers + GIT_TAG 0.6.0 +) +FetchContent_MakeAvailable(cudawrappers) + +# Set up libtcc +add_subdirectory(libtcc) + +# Set up tests +include(CTest) +if(BUILD_TESTING) + add_subdirectory(test) +endif() + +# Install project cmake targets +include(CMakePackageConfigHelpers) +write_basic_package_version_file( + ${PROJECT_NAME}-config-version.cmake + VERSION ${cudawrappers_VERSION} + COMPATIBILITY AnyNewerVersion +) +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} +) + +# --- auto-ignore build directory +if(NOT EXISTS ${PROJECT_BINARY_DIR}/.gitignore) + file(WRITE ${PROJECT_BINARY_DIR}/.gitignore "*") +endif() diff --git a/Makefile b/Makefile deleted file mode 100644 index c062e208ae6df0e979d6730b9e1b1da3c3cd0f18..0000000000000000000000000000000000000000 --- a/Makefile +++ /dev/null @@ -1,117 +0,0 @@ -VERSION= 0.8 -CUDA= $(shell dirname `dirname \`which nvcc\``) -#CUDA= /usr/local/cuda -CUDA_INCLUDE= $(shell dirname `find $(CUDA)/ -name cuda.h`) -CUDA_LIBDIR= $(shell dirname `find $(CUDA)/ -name libcuda.so`|head -n1) -NVRTC_LIBDIR= $(shell dirname `find $(CUDA)/ -name libnvrtc.so`|tail -n1) -#POWER_SENSOR= $(HOME)/projects/PowerSensor3 -ARCH= $(shell arch) -CC= gcc -CXX= g++ #-Wno-deprecated-declarations -NVCC= nvcc -INCLUDES= -I. -CUDAWRAPPERS_LIBDIR=external/cuda-wrappers/build-${ARCH} -CUDAWRAPPERS_INCLUDE=external/cuda-wrappers/include -CUDAWRAPPERSFLAGS= -Xlinker -rpath=${CUDAWRAPPERS_LIBDIR} ${CUDAWRAPPERS_LIBDIR}/cudawrappers-cu.so ${CUDAWRAPPERS_LIBDIR}/cudawrappers-nvrtc.so -INCLUDES+= -I$(CUDA_INCLUDE) -I${CUDAWRAPPERS_INCLUDE} -#INCLUDES+= -I$(CUDA_INCLUDE) -I$(NVRTC_INCLUDE) -#INCLUDES+= -I$(POWER_SENSOR)/host/include -CXXFLAGS+= -std=c++17 -O3 -g -fpic -fopenmp $(INCLUDES) -DNDEBUG -NVCCFLAGS= -std c++14 $(INCLUDES) - -#CXXFLAGS+= -march=core-avx2 -mcmodel=medium - -BUILD_DIR= build-$(ARCH) -BUILD_SUB_DIRS= $(BUILD_DIR) $(BUILD_DIR)/libtcc $(BUILD_DIR)/test $(BUILD_DIR)/test/Common $(BUILD_DIR)/test/SimpleExample $(BUILD_DIR)/test/CorrelatorTest $(BUILD_DIR)/test/OpenCLCorrelatorTest - -LIBTCC_SOURCES= libtcc/CorrelatorKernel.cc\ - libtcc/Correlator.cc\ - libtcc/Kernel.cc - - -CORRELATOR_TEST_SOURCES=test/CorrelatorTest/CorrelatorTest.cc\ - test/CorrelatorTest/Options.cc\ - test/Common/Record.cc\ - test/Common/UnitTest.cc - -OPENCL_TEST_SOURCES= test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc - -SIMPLE_EXAMPLE_SOURCES= test/SimpleExample/SimpleExample.cu - - -LIBTCC_OBJECTS= $(LIBTCC_SOURCES:%.cc=$(BUILD_DIR)/%.o) $(BUILD_DIR)/libtcc/TCCorrelator.o -SIMPLE_EXAMPLE_OBJECTS= $(SIMPLE_EXAMPLE_SOURCES:%.cu=$(BUILD_DIR)/%.o) -CORRELATOR_TEST_OBJECTS=$(CORRELATOR_TEST_SOURCES:%.cc=$(BUILD_DIR)/%.o) -OPENCL_TEST_OBJECTS= $(OPENCL_TEST_SOURCES:%.cc=$(BUILD_DIR)/%.o) - -OBJECTS= $(LIBTCC_OBJECTS)\ - $(SIMPLE_EXAMPLE_OBJECTS)\ - $(CORRELATOR_TEST_OBJECTS)\ - $(OPENCL_TEST_OBJECTS) - -SHARED_OBJECTS= $(BUILD_DIR)/libtcc/libtcc.so $(BUILD_DIR)/libtcc/libtcc.so.$(VERSION) - -DEPENDENCIES= $(OBJECTS:%.o=%.d) - -EXECUTABLES= $(BUILD_DIR)/test/SimpleExample/SimpleExample\ - $(BUILD_DIR)/test/CorrelatorTest/CorrelatorTest - -ifneq ("$(wildcard $(CUDA_INCLUDE)/CL/cl.hpp)", "") -EXECUTABLES+= $(BUILD_DIR)/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest -endif - -LIBRARIES= -L$(CUDA_LIBDIR) -lcuda -LIBRARIES+= -L$(NVRTC_LIBDIR) -lnvrtc -Xlinker -rpath=$(NVRTC_LIBDIR) -#LIBRARIES+= -L$(POWER_SENSOR)/build-$(ARCH)/host -lPowerSensor -lnvidia-ml - - -$(BUILD_DIR)/%.d: %.cc - -$(CXX) $(CXXFLAGS) -MM -MT $@ -MT ${@:%.d=%.o} -MT ${@:%.d=%.s} $< -o $@ - -$(BUILD_DIR)/%.d: %.cu - -$(CXX) -x c++ $(CXXFLAGS) -MM -MT $@ -MT ${@:%.d=%.o} -MT ${@:%.d=%.s} $< -o $@ - -$(BUILD_DIR)/%.o: %.cc - $(CXX) $(CXXFLAGS) -o $@ -c $< - -$(BUILD_DIR)/%.o: %.cu - $(NVCC) $(NVCCFLAGS) -o $@ -c $< - -$(BUILD_DIR)/%.s: %.cc - $(CXX) $(CXXFLAGS) -o $@ -S $< - -$(BUILD_DIR)/%.so: $(BUILD_DIR)/%.so.$(VERSION) - rm -f $@ - ln -s $(@F).$(VERSION) $@ - -all:: $(EXECUTABLES) - -clean:: - rm -rf $(BUILD_DIR) - -$(OBJECTS) $(SHARED_OBJECTS) $(DEPENDENCIES) $(EXECUTABLES): $(BUILD_DIR) - -$(BUILD_DIR): - mkdir -p $(BUILD_SUB_DIRS) - -$(BUILD_DIR)/libtcc/TCCorrelator.o: libtcc/TCCorrelator.cu # CUDA code embedded in object file - ld -r -b binary -o $@ $< - -$(BUILD_DIR)/libtcc/TCCorrelator.d: - - - -$(BUILD_DIR)/libtcc/libtcc.so.$(VERSION): $(LIBTCC_OBJECTS) - $(CXX) -shared -o $@ $(LIBTCC_OBJECTS) $(LIBRARIES) - -$(BUILD_DIR)/test/SimpleExample/SimpleExample: $(SIMPLE_EXAMPLE_OBJECTS) $(BUILD_DIR)/libtcc/libtcc.so - $(NVCC) $(NVCCFLAGS) -o $@ $(SIMPLE_EXAMPLE_OBJECTS) -Xlinker -rpath=$(BUILD_DIR)/libtcc -L$(BUILD_DIR)/libtcc -ltcc $(LIBRARIES) ${CUDAWRAPPERSFLAGS} - -$(BUILD_DIR)/test/CorrelatorTest/CorrelatorTest: $(CORRELATOR_TEST_OBJECTS) $(BUILD_DIR)/libtcc/libtcc.so - $(CXX) $(CXXFLAGS) -o $@ $(CORRELATOR_TEST_OBJECTS) -Wl,-rpath=$(BUILD_DIR)/libtcc -L$(BUILD_DIR)/libtcc -ltcc $(LIBRARIES) ${CUDAWRAPPERSFLAGS} - -$(BUILD_DIR)/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest: $(OPENCL_TEST_OBJECTS) - $(CXX) $(CXXFLAGS) -o $@ $(OPENCL_TEST_OBJECTS) -L$(CUDA)/lib64 -lOpenCL - -ifeq (0, $(words $(findstring $(MAKECMDGOALS), clean))) --include $(DEPENDENCIES) -endif diff --git a/README.md b/README.md index d6cd33ca447b611eb786d246794d0713be0e512d..a1ee014fe75b6d980bdec002227e6d7fb0660a59 100644 --- a/README.md +++ b/README.md @@ -11,20 +11,7 @@ _Astronomy and Astrophysics_, 656(A32), pages 1-4, December 2021). ## Brief overview on how to use the Tensor-Core Correlator library: -Clone the repository (`git clone --recursive`) - -Build [cudawrappers](https://github.com/nlesc-recruit/cudawrappers): -``` -cd external/cuda-wrappers -mkdir build-$(arch) -cd build-$(arch) -cmake -DCMAKE_INSTALL_PREFIX=$(pwd) .. -make install -cd ../../.. -``` -In a later release, cudawrappers wil be header-only and this step will no longer be needed. - -Build the library (just type `make`) +Clone the and build the repository, see below. Include `libtcc/Correlator.h`, and link with `libtcc/libtcc.so`. Create a `tcc::Correlator` object with the number of receivers, channels, etc. @@ -76,4 +63,52 @@ Limitations: - the amount of samples over which is integrated) must be a multiple of 128 / `NR_BITS` (i.e., 32, 16, or 8 for 4-bit, 8-bit, or 16-bit input, respectively). +## Building, testing, and installation +Clone the repository: +```bash +git clone https://git.astron.nl/RD/tensor-core-correlator.git +``` + +To build and install the project, run: +```bash +cmake -S . -B build +make -C build +make -C build install +``` + +To install in a custom location, e.g. `~/.local`, run: +```bash +cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local +make -C build +make -C build install +``` + +To compile and run the tests, run: +```bash +cmake -S. -B build -DBUILD_TESTING=ON +make -C build +make -C build test +``` +The tests require a GPU. +On the DAS-6/ASTRON cluster you can request a GPU node and run the tests with the command: +```bash +srun -N 1 --gres=gpu:A4000:1 make -C build test +``` + +Note that in the command above a node with a NVIDIA A4000 GPU is requested, because the tests require a GPU that has tensor cores. + +## Example usage +The `example` subdirectory has a minimal example that demonstrates how this +library can be integrated in another project. This example assumes that you +pre-installed both this library (`libtcc`) and `cudawrappers`. E.g. when +`libtcc` is installed in `<prefix>/libtcc` and `cudawrappers` is installed in +`<prefix>/cudawrappers`, you can build the example by running: +```bash +cmake . -DCMAKE_PREFIX_PATH="<prefix>/cudawrappers;<prefix>/tcc" +make +./example +``` + + +## Bugs/feedback Contact John Romein (romein@astron.nl) to report bugs/feedback diff --git a/cmake/tensor-core-correlator-config.cmake.in b/cmake/tensor-core-correlator-config.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..f670102752c3e4a91da1e313572642831ffeb9da --- /dev/null +++ b/cmake/tensor-core-correlator-config.cmake.in @@ -0,0 +1,7 @@ +include(CMakeFindDependencyMacro) + +find_package(CUDAToolkit @CUDA_MIN_VERSION@ REQUIRED) + +foreach(component ${@PROJECT_NAME@_FIND_COMPONENTS}) + include(${CMAKE_CURRENT_LIST_DIR}/${component}-config.cmake) +endforeach() diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..cdeba691a11e65b79417db4bc9f2cec7cb5a5f99 --- /dev/null +++ b/example/CMakeLists.txt @@ -0,0 +1,9 @@ +project(example) +cmake_minimum_required(VERSION 3.17 FATAL_ERROR) + +find_package(CUDAToolkit REQUIRED) +find_package(cudawrappers REQUIRED) +find_package(libtcc REQUIRED) + +add_executable(${PROJECT_NAME} example.cpp) +target_link_libraries(${PROJECT_NAME} tcc cudawrappers::cu cudawrappers::nvrtc) diff --git a/example/example.cpp b/example/example.cpp new file mode 100644 index 0000000000000000000000000000000000000000..49d5b3b174a923bf9bd1c437f0bc55bc1e6f5895 --- /dev/null +++ b/example/example.cpp @@ -0,0 +1,19 @@ +#include <cudawrappers/cu.hpp> +#include <libtcc/Correlator.h> + +#define NR_BITS 8 +#define NR_CHANNELS 480 +#define NR_POLARIZATIONS 2 +#define NR_SAMPLES_PER_CHANNEL 3072 +#define NR_RECEIVERS 576 +#define NR_RECEIVERS_PER_BLOCK 64 + +int main(int argc, char *argv[]) { + cu::init(); + cu::Device device(0); + cu::Context context(0, device); + context.setCurrent(); + tcc::Correlator correlator(NR_BITS, NR_RECEIVERS, NR_CHANNELS, + NR_SAMPLES_PER_CHANNEL, NR_POLARIZATIONS, + NR_RECEIVERS_PER_BLOCK); +} diff --git a/external/cuda-wrappers b/external/cuda-wrappers deleted file mode 160000 index d7f133c7e10c238d4bcceb2219f658eec64e9d7f..0000000000000000000000000000000000000000 --- a/external/cuda-wrappers +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d7f133c7e10c238d4bcceb2219f658eec64e9d7f diff --git a/libtcc/CMakeLists.txt b/libtcc/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..f28f6335c190b83a1c00216821873e2c559b7173 --- /dev/null +++ b/libtcc/CMakeLists.txt @@ -0,0 +1,27 @@ +# Create tcc library +add_library(tcc) +# Add source files +target_sources(tcc PRIVATE Correlator.cc CorrelatorKernel.cc Kernel.cc) +# Add public headers +set_target_properties( + tcc PROPERTIES PUBLIC_HEADER "Correlator.h;CorrelatorKernel.h;Kernel.h" +) +# Add includes +target_include_directories( + tcc PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}> + $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}> +) +# Add links +target_link_libraries(tcc PRIVATE cudawrappers::cu cudawrappers::nvrtc) +target_embed_source(tcc kernel/TCCorrelator.cu) +# Install libraries and headers +install( + TARGETS tcc + EXPORT ${PROJECT_NAME}-config # export tcc cmake targets + COMPONENT tcc + PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME} +) +# Install tcc cmake targets +install(EXPORT ${PROJECT_NAME}-config + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} +) diff --git a/libtcc/Correlator.cc b/libtcc/Correlator.cc index 14d54eef021df399c5032b6185f711e6a1fe56f5..a27af39c58211fdb792b5ae4a37a6efe8b7e2c13 100644 --- a/libtcc/Correlator.cc +++ b/libtcc/Correlator.cc @@ -7,7 +7,7 @@ #include <link.h> -extern const char _binary_libtcc_TCCorrelator_cu_start, _binary_libtcc_TCCorrelator_cu_end; +extern const char _binary_kernel_TCCorrelator_cu_start, _binary_kernel_TCCorrelator_cu_end; namespace tcc { @@ -56,7 +56,7 @@ cu::Module Correlator::compileModule(unsigned nrBits, const std::string &customStoreVisibility ) { - cu::Device device(cu::Context::getCurrent().getDevice()); + cu::Device device(0); int capability = 10 * device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>() + device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>(); std::vector<std::string> options = @@ -78,15 +78,8 @@ cu::Module Correlator::compileModule(unsigned nrBits, //std::for_each(options.begin(), options.end(), [] (const std::string &e) { std::cout << e << ' '; }); std::cout << std::endl; -#if 0 - nvrtc::Program program("tcc/TCCorrelator.cu"); -#else - // embed the CUDA source code in libtcc.so, so that it need not be installed separately - // for runtime compilation - // copy into std::string for '\0' termination - std::string source(&_binary_libtcc_TCCorrelator_cu_start, &_binary_libtcc_TCCorrelator_cu_end); + const std::string source(&_binary_kernel_TCCorrelator_cu_start, &_binary_kernel_TCCorrelator_cu_end); nvrtc::Program program(source, "TCCorrelator.cu"); -#endif try { program.compile(options); diff --git a/libtcc/Correlator.h b/libtcc/Correlator.h index df733d54bcf7e07d68d8cb8dd01f273550063968..a26f56e5d81039f875c42a2d0947023b773306b3 100644 --- a/libtcc/Correlator.h +++ b/libtcc/Correlator.h @@ -1,12 +1,12 @@ #if !defined TCC_CORRELATOR_H #define TCC_CORRELATOR_H -#include "libtcc/CorrelatorKernel.h" +#include <string> + #include <cudawrappers/cu.hpp> #include <cudawrappers/nvrtc.hpp> -#include <string> - +#include "libtcc/CorrelatorKernel.h" namespace tcc { class Correlator { diff --git a/libtcc/Kernel.h b/libtcc/Kernel.h index b9a1bf4045ec7c938220b6ba379be9434cdc343f..1663577f9796d56d02c25e29dcea2abcb127e302 100644 --- a/libtcc/Kernel.h +++ b/libtcc/Kernel.h @@ -5,7 +5,6 @@ #include <stdint.h> - namespace tcc { class Kernel { diff --git a/libtcc/TCCorrelator.cu b/libtcc/kernel/TCCorrelator.cu similarity index 100% rename from libtcc/TCCorrelator.cu rename to libtcc/kernel/TCCorrelator.cu diff --git a/scripts/build-cudawrappers.sh b/scripts/build-cudawrappers.sh deleted file mode 100644 index 8b9131eb52789ed4b8f4e1131459e466b75935ff..0000000000000000000000000000000000000000 --- a/scripts/build-cudawrappers.sh +++ /dev/null @@ -1,6 +0,0 @@ -cd external/cuda-wrappers -mkdir build-$(arch) -cd build-$(arch) -cmake -DCMAKE_INSTALL_PREFIX=$(pwd) .. -make install -cd ../../.. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2ca29168a1b4ad4a5941186e493a3ae5dcd44a21 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,4 @@ +add_subdirectory(Common) +add_subdirectory(CorrelatorTest) +add_subdirectory(OpenCLCorrelatorTest) +add_subdirectory(SimpleExample) diff --git a/test/Common/CMakeLists.txt b/test/Common/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0e6e222c9614cbd674199faa4c9828f7e759e10d --- /dev/null +++ b/test/Common/CMakeLists.txt @@ -0,0 +1,6 @@ +foreach(component Record UnitTest) + add_library(${component}) + target_sources(${component} PRIVATE ${component}.cc) + target_include_directories(${component} PRIVATE ${CMAKE_SOURCE_DIR}) + target_link_libraries(${component} PUBLIC cudawrappers::cu) +endforeach() diff --git a/test/Common/Record.h b/test/Common/Record.h index 8ef3b0f6d4408a816d79ba4afdd52d5230de27df..439d15da7d778ce1238dad562ccc01757d881e0d 100644 --- a/test/Common/Record.h +++ b/test/Common/Record.h @@ -4,7 +4,6 @@ #include "test/Common/Config.h" #include <cudawrappers/cu.hpp> - #if defined MEASURE_POWER #include <powersensor/NVMLPowerSensor.h> #endif diff --git a/test/Common/UnitTest.h b/test/Common/UnitTest.h index cb28715fc216a91f13532cb4df7446d35ae486f7..e0bec26eebd317753033f7f68fa72263c93ebce5 100644 --- a/test/Common/UnitTest.h +++ b/test/Common/UnitTest.h @@ -1,8 +1,8 @@ #if !defined UNIT_TEST_H #define UNIT_TEST_H -#include "test/Common/Record.h" #include <cudawrappers/cu.hpp> +#include "test/Common/Record.h" #if defined MEASURE_POWER #include <powersensor/NVMLPowerSensor.h> diff --git a/test/CorrelatorTest/CMakeLists.txt b/test/CorrelatorTest/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..89098ba5b489e07e5bdf7ac96486fbe629625a95 --- /dev/null +++ b/test/CorrelatorTest/CMakeLists.txt @@ -0,0 +1,45 @@ +find_package(OpenMP REQUIRED) + +add_executable(CorrelatorTest) +target_sources(CorrelatorTest PRIVATE CorrelatorTest.cc Options.cc) +target_include_directories(CorrelatorTest PRIVATE ${CMAKE_SOURCE_DIR}) +target_link_libraries( + CorrelatorTest PRIVATE tcc Record UnitTest OpenMP::OpenMP_CXX +) + +# Define various combinations of parameters to test: +# b: nrBits must be 4, 8, or 16 +# c: nrChannels +# d: deviceNumber +# n: nrReceivers +# N: nrReceiversPerBlock must be 32, 48, or 64 +# r: innerRepeatCount +# R: outerRepeatCount +# t: nrSamplesPerChannel must be a multiple of (128 / nrBits) +# V: verifyOutput +set(ARGS0 -b 4 -c 1 -n 1 -N 32 -r 1 -R 1 -t 32) +set(ARGS1 -b 8 -c 1 -n 1 -N 48 -r 1 -R 1 -t 16) +set(ARGS2 -b 16 -c 1 -n 1 -N 64 -r 1 -R 1 -t 8) +set(ARGS3 -b 16 -c 2 -n 3 -N 32 -r 4 -R 5 -t 64) + +foreach(idx RANGE 3) + add_test(NAME CorrelatorTest${idx} COMMAND CorrelatorTest ${ARGS${idx}}) +endforeach() + +# Add tests for nrReceivers with all primes below 768 +foreach(idx + 2 3 5 7 11 13 17 19 23 29 31 37 41 + 43 47 53 59 61 67 71 73 79 83 89 97 101 +103 107 109 113 127 131 137 139 149 151 157 163 167 +173 179 181 191 193 197 199 211 223 227 229 233 239 +241 251 257 263 269 271 277 281 283 293 307 311 313 +317 331 337 347 349 353 359 367 373 379 383 389 397 +401 409 419 421 431 433 439 443 449 457 461 463 467 +479 487 491 499 503 509 521 523 541 547 557 563 569 +571 577 587 593 599 601 607 613 617 619 631 641 643 +647 653 659 661 673 677 683 691 701 709 719 727 733 +739 743 751 757 761) + add_test(NAME CorrelatorTest-nrReceivers-${idx} + COMMAND CorrelatorTest -b 16 -c 1 -n ${idx} -N 32 -r 1 -R 1 -t 8 + ) +endforeach() diff --git a/test/CorrelatorTest/CorrelatorTest.cc b/test/CorrelatorTest/CorrelatorTest.cc index 36fa1ce6ab673f6125b81da554c020fdbe88d2ae..9237b2e1f937fa8f7cd3c168d8d59cbadaa16600 100644 --- a/test/CorrelatorTest/CorrelatorTest.cc +++ b/test/CorrelatorTest/CorrelatorTest.cc @@ -1,18 +1,20 @@ -#include "test/Common/ComplexInt4.h" -#include "test/Common/Record.h" #include "test/CorrelatorTest/CorrelatorTest.h" #include "util/ExceptionPropagator.h" -#include <cudawrappers/nvrtc.hpp> #include <cstdlib> #include <cstring> #include <iostream> +#include <cudawrappers/nvrtc.hpp> + +#include "test/Common/ComplexInt4.h" +#include "test/Common/Record.h" +#include "util/ExceptionPropagator.h" + #define GNU_SOURCE #include <link.h> #include <omp.h> - CorrelatorTest::CorrelatorTest(const Options &options) : UnitTest(options.deviceNumber), @@ -223,17 +225,21 @@ template<typename SampleType, typename VisibilityType> void CorrelatorTest::veri int main(int argc, char *argv[]) { + int err{0}; try { cu::init(); Options options(argc, argv); CorrelatorTest test(options); } catch (cu::Error &error) { std::cerr << "cu::Error: " << error.what() << std::endl; + err = 1; } catch (nvrtc::Error &error) { std::cerr << "nvrtc::Error: " << error.what() << std::endl; + err = 1; } catch (Options::Error &error) { std::cerr << "Options::Error: " << error.what() << std::endl; + err = 1; } - return 0; + return err; } diff --git a/test/OpenCLCorrelatorTest/CMakeLists.txt b/test/OpenCLCorrelatorTest/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..84117b5edf3259e3350ac297af1fd786f642f4c2 --- /dev/null +++ b/test/OpenCLCorrelatorTest/CMakeLists.txt @@ -0,0 +1,19 @@ +project(OpenCLCorrelatorTest) +find_package(OpenMP REQUIRED) +find_package(OpenCL) + +if(${OpenCL_FOUND}) + set(KERNEL_SOURCE_FILE "${CMAKE_SOURCE_DIR}/libtcc/kernel/TCCorrelator.cu") + add_executable(${PROJECT_NAME}) + target_sources(${PROJECT_NAME} PRIVATE OpenCLCorrelatorTest.cc) + target_include_directories( + ${PROJECT_NAME} PRIVATE ${CMAKE_SOURCE_DIR} ${OpenCL_INCLUDE_DIRS} + ) + target_link_libraries( + ${PROJECT_NAME} PRIVATE ${OpenCL_LIBRARIES} OpenMP::OpenMP_CXX + ) + target_compile_definitions( + ${PROJECT_NAME} PRIVATE KERNEL_SOURCE_FILE="${KERNEL_SOURCE_FILE}" + ) + +endif() diff --git a/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc b/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc index 518705463f662f1120428d643f40141dfb2d66dd..067e966e92fa1082d8c572414b5a183e6628ffd3 100644 --- a/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc +++ b/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc @@ -248,8 +248,8 @@ cl::Program createProgramFromBinaries(cl::Context &context, std::vector<cl::Devi << " -DNR_SAMPLES_PER_CHANNEL=" << NR_SAMPLES_PER_CHANNEL << " -DNR_POLARIZATIONS=" << NR_POLARIZATIONS << " -DNR_RECEIVERS_PER_BLOCK=" << NR_RECEIVERS_PER_BLOCK - << " -o -" - << " libtcc/TCCorrelator.cu" + << " -o - " + << KERNEL_SOURCE_FILE << "|sed -e s/.param\\ .[a-zA-Z0-9]*/\\&\\ .ptr\\ .global/"; std::clog << "executing: " << command.str() << std::endl; @@ -415,6 +415,7 @@ void checkTestPattern(cl::CommandQueue &queue, cl::Buffer &visibilitiesBuffer, c int main() { + int err{0}; try { cl::Context context; std::vector<cl::Device> devices; @@ -456,9 +457,11 @@ for (int i = 0; i < 100; i ++) checkTestPattern(queue, visibilities, samples); } catch (cl::Error &error) { std::cerr << "caught cl::Error: " << error.what() << ": " << errorMessage(error.err()) << std::endl; + err = 1; } catch (std::exception &error) { std::cerr << "caught std::exception: " << error.what() << std::endl; + err = 1; } - return 0; + return err; } diff --git a/test/SimpleExample/CMakeLists.txt b/test/SimpleExample/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..5def298c2857a4c7a5e7d387d74dcf4d6216af50 --- /dev/null +++ b/test/SimpleExample/CMakeLists.txt @@ -0,0 +1,7 @@ +add_executable(SimpleExample) +set_property(TARGET SimpleExample PROPERTY CUDA_ARCHITECTURES OFF) +target_sources(SimpleExample PRIVATE SimpleExample.cu) +target_include_directories(SimpleExample PRIVATE ${CMAKE_SOURCE_DIR}) +target_link_libraries(SimpleExample PRIVATE tcc cudawrappers::cu) + +add_test(NAME SimpleExample COMMAND SimpleExample) diff --git a/test/SimpleExample/SimpleExample.cu b/test/SimpleExample/SimpleExample.cu index ba862d35587c462431b81a10d61f097cd49c826b..25846a09bc822e7eefc254815bf0083a35596cac 100644 --- a/test/SimpleExample/SimpleExample.cu +++ b/test/SimpleExample/SimpleExample.cu @@ -7,14 +7,14 @@ #define NR_RECEIVERS_PER_BLOCK 64 #define NR_TIMES_PER_BLOCK (128 / (NR_BITS)) - -#include "test/Common/ComplexInt4.h" -#include "libtcc/Correlator.h" - #include <complex> #include <iostream> #include <cuda.h> + +#include "test/Common/ComplexInt4.h" +#include "libtcc/Correlator.h" + #if NR_BITS == 16 #include <cuda_fp16.h> #endif @@ -46,6 +46,7 @@ typedef Visibility Visibilities[NR_CHANNELS][NR_BASELINES][NR_POLARIZATIONS][NR_ int main() { + int err{0}; try { checkCudaCall(cudaSetDevice(0)); // combine the CUDA runtime API and CUDA driver API checkCudaCall(cudaFree(0)); @@ -73,5 +74,7 @@ int main() checkCudaCall(cudaStreamDestroy(stream)); } catch (std::exception &error) { std::cerr << error.what() << std::endl; + err = 1; } + return err; }