Skip to content
Snippets Groups Projects
Commit 83b057ec authored by John Romein's avatar John Romein
Browse files

Merge branch 'cmake-build2' into 'master'

Use CMake and upgrade to cudawrappers 0.6.0

See merge request !5
parents cebb8016 98a60f1f
No related branches found
No related tags found
1 merge request!5Use CMake and upgrade to cudawrappers 0.6.0
Pipeline #60172 passed
Showing
with 235 additions and 176 deletions
......@@ -11,10 +11,11 @@ build:
- das6
script:
- source scripts/load-modules.sh
- source scripts/build-cudawrappers.sh
- mkdir build && cd build
- cmake ..
- make -j
test-example:
test:
stage: testing
tags:
- das6
......@@ -22,21 +23,7 @@ test-example:
- build
script:
- source scripts/load-modules.sh
- source scripts/build-cudawrappers.sh
- mkdir build && cd build
- cmake .. -DBUILD_TESTING=On
- make -j
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/libtcc
- build-$(arch)/test/SimpleExample/SimpleExample
test-correlator:
stage: testing
tags:
- das6
dependencies:
- build
script:
- source scripts/load-modules.sh
- source scripts/build-cudawrappers.sh
- make -j
- export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/libtcc
- build-$(arch)/test/CorrelatorTest/CorrelatorTest -n 20
- make test
[submodule "external/cuda-wrappers"]
path = external/cuda-wrappers
url = https://github.com/nlesc-recruit/CUDA-wrappers.git
cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
project(
libtcc
DESCRIPTION "Tensor-Core Correlator"
VERSION 0.5
HOMEPAGE_URL https://git.astron.nl/RD/tensor-core-correlator
LANGUAGES CXX CUDA
)
set(CMAKE_CXX_STANDARD 17)
option(BUILD_SHARED_LIBS "Create shared libraries" True)
option(BUILD_TESTING "Build tests" False)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE
"Release"
CACHE STRING "CMake build type" FORCE
)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release")
endif()
find_package(CUDAToolkit REQUIRED)
include(FetchContent)
FetchContent_Declare(
cudawrappers
GIT_REPOSITORY https://github.com/nlesc-recruit/cudawrappers
GIT_TAG 0.6.0
)
FetchContent_MakeAvailable(cudawrappers)
# Set up libtcc
add_subdirectory(libtcc)
# Set up tests
include(CTest)
if(BUILD_TESTING)
add_subdirectory(test)
endif()
# Install project cmake targets
include(CMakePackageConfigHelpers)
write_basic_package_version_file(
${PROJECT_NAME}-config-version.cmake
VERSION ${cudawrappers_VERSION}
COMPATIBILITY AnyNewerVersion
)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
)
# --- auto-ignore build directory
if(NOT EXISTS ${PROJECT_BINARY_DIR}/.gitignore)
file(WRITE ${PROJECT_BINARY_DIR}/.gitignore "*")
endif()
VERSION= 0.8
CUDA= $(shell dirname `dirname \`which nvcc\``)
#CUDA= /usr/local/cuda
CUDA_INCLUDE= $(shell dirname `find $(CUDA)/ -name cuda.h`)
CUDA_LIBDIR= $(shell dirname `find $(CUDA)/ -name libcuda.so`|head -n1)
NVRTC_LIBDIR= $(shell dirname `find $(CUDA)/ -name libnvrtc.so`|tail -n1)
#POWER_SENSOR= $(HOME)/projects/PowerSensor3
ARCH= $(shell arch)
CC= gcc
CXX= g++ #-Wno-deprecated-declarations
NVCC= nvcc
INCLUDES= -I.
CUDAWRAPPERS_LIBDIR=external/cuda-wrappers/build-${ARCH}
CUDAWRAPPERS_INCLUDE=external/cuda-wrappers/include
CUDAWRAPPERSFLAGS= -Xlinker -rpath=${CUDAWRAPPERS_LIBDIR} ${CUDAWRAPPERS_LIBDIR}/cudawrappers-cu.so ${CUDAWRAPPERS_LIBDIR}/cudawrappers-nvrtc.so
INCLUDES+= -I$(CUDA_INCLUDE) -I${CUDAWRAPPERS_INCLUDE}
#INCLUDES+= -I$(CUDA_INCLUDE) -I$(NVRTC_INCLUDE)
#INCLUDES+= -I$(POWER_SENSOR)/host/include
CXXFLAGS+= -std=c++17 -O3 -g -fpic -fopenmp $(INCLUDES) -DNDEBUG
NVCCFLAGS= -std c++14 $(INCLUDES)
#CXXFLAGS+= -march=core-avx2 -mcmodel=medium
BUILD_DIR= build-$(ARCH)
BUILD_SUB_DIRS= $(BUILD_DIR) $(BUILD_DIR)/libtcc $(BUILD_DIR)/test $(BUILD_DIR)/test/Common $(BUILD_DIR)/test/SimpleExample $(BUILD_DIR)/test/CorrelatorTest $(BUILD_DIR)/test/OpenCLCorrelatorTest
LIBTCC_SOURCES= libtcc/CorrelatorKernel.cc\
libtcc/Correlator.cc\
libtcc/Kernel.cc
CORRELATOR_TEST_SOURCES=test/CorrelatorTest/CorrelatorTest.cc\
test/CorrelatorTest/Options.cc\
test/Common/Record.cc\
test/Common/UnitTest.cc
OPENCL_TEST_SOURCES= test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc
SIMPLE_EXAMPLE_SOURCES= test/SimpleExample/SimpleExample.cu
LIBTCC_OBJECTS= $(LIBTCC_SOURCES:%.cc=$(BUILD_DIR)/%.o) $(BUILD_DIR)/libtcc/TCCorrelator.o
SIMPLE_EXAMPLE_OBJECTS= $(SIMPLE_EXAMPLE_SOURCES:%.cu=$(BUILD_DIR)/%.o)
CORRELATOR_TEST_OBJECTS=$(CORRELATOR_TEST_SOURCES:%.cc=$(BUILD_DIR)/%.o)
OPENCL_TEST_OBJECTS= $(OPENCL_TEST_SOURCES:%.cc=$(BUILD_DIR)/%.o)
OBJECTS= $(LIBTCC_OBJECTS)\
$(SIMPLE_EXAMPLE_OBJECTS)\
$(CORRELATOR_TEST_OBJECTS)\
$(OPENCL_TEST_OBJECTS)
SHARED_OBJECTS= $(BUILD_DIR)/libtcc/libtcc.so $(BUILD_DIR)/libtcc/libtcc.so.$(VERSION)
DEPENDENCIES= $(OBJECTS:%.o=%.d)
EXECUTABLES= $(BUILD_DIR)/test/SimpleExample/SimpleExample\
$(BUILD_DIR)/test/CorrelatorTest/CorrelatorTest
ifneq ("$(wildcard $(CUDA_INCLUDE)/CL/cl.hpp)", "")
EXECUTABLES+= $(BUILD_DIR)/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest
endif
LIBRARIES= -L$(CUDA_LIBDIR) -lcuda
LIBRARIES+= -L$(NVRTC_LIBDIR) -lnvrtc -Xlinker -rpath=$(NVRTC_LIBDIR)
#LIBRARIES+= -L$(POWER_SENSOR)/build-$(ARCH)/host -lPowerSensor -lnvidia-ml
$(BUILD_DIR)/%.d: %.cc
-$(CXX) $(CXXFLAGS) -MM -MT $@ -MT ${@:%.d=%.o} -MT ${@:%.d=%.s} $< -o $@
$(BUILD_DIR)/%.d: %.cu
-$(CXX) -x c++ $(CXXFLAGS) -MM -MT $@ -MT ${@:%.d=%.o} -MT ${@:%.d=%.s} $< -o $@
$(BUILD_DIR)/%.o: %.cc
$(CXX) $(CXXFLAGS) -o $@ -c $<
$(BUILD_DIR)/%.o: %.cu
$(NVCC) $(NVCCFLAGS) -o $@ -c $<
$(BUILD_DIR)/%.s: %.cc
$(CXX) $(CXXFLAGS) -o $@ -S $<
$(BUILD_DIR)/%.so: $(BUILD_DIR)/%.so.$(VERSION)
rm -f $@
ln -s $(@F).$(VERSION) $@
all:: $(EXECUTABLES)
clean::
rm -rf $(BUILD_DIR)
$(OBJECTS) $(SHARED_OBJECTS) $(DEPENDENCIES) $(EXECUTABLES): $(BUILD_DIR)
$(BUILD_DIR):
mkdir -p $(BUILD_SUB_DIRS)
$(BUILD_DIR)/libtcc/TCCorrelator.o: libtcc/TCCorrelator.cu # CUDA code embedded in object file
ld -r -b binary -o $@ $<
$(BUILD_DIR)/libtcc/TCCorrelator.d:
-
$(BUILD_DIR)/libtcc/libtcc.so.$(VERSION): $(LIBTCC_OBJECTS)
$(CXX) -shared -o $@ $(LIBTCC_OBJECTS) $(LIBRARIES)
$(BUILD_DIR)/test/SimpleExample/SimpleExample: $(SIMPLE_EXAMPLE_OBJECTS) $(BUILD_DIR)/libtcc/libtcc.so
$(NVCC) $(NVCCFLAGS) -o $@ $(SIMPLE_EXAMPLE_OBJECTS) -Xlinker -rpath=$(BUILD_DIR)/libtcc -L$(BUILD_DIR)/libtcc -ltcc $(LIBRARIES) ${CUDAWRAPPERSFLAGS}
$(BUILD_DIR)/test/CorrelatorTest/CorrelatorTest: $(CORRELATOR_TEST_OBJECTS) $(BUILD_DIR)/libtcc/libtcc.so
$(CXX) $(CXXFLAGS) -o $@ $(CORRELATOR_TEST_OBJECTS) -Wl,-rpath=$(BUILD_DIR)/libtcc -L$(BUILD_DIR)/libtcc -ltcc $(LIBRARIES) ${CUDAWRAPPERSFLAGS}
$(BUILD_DIR)/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest: $(OPENCL_TEST_OBJECTS)
$(CXX) $(CXXFLAGS) -o $@ $(OPENCL_TEST_OBJECTS) -L$(CUDA)/lib64 -lOpenCL
ifeq (0, $(words $(findstring $(MAKECMDGOALS), clean)))
-include $(DEPENDENCIES)
endif
......@@ -11,20 +11,7 @@ _Astronomy and Astrophysics_, 656(A32), pages 1-4, December 2021).
## Brief overview on how to use the Tensor-Core Correlator library:
Clone the repository (`git clone --recursive`)
Build [cudawrappers](https://github.com/nlesc-recruit/cudawrappers):
```
cd external/cuda-wrappers
mkdir build-$(arch)
cd build-$(arch)
cmake -DCMAKE_INSTALL_PREFIX=$(pwd) ..
make install
cd ../../..
```
In a later release, cudawrappers wil be header-only and this step will no longer be needed.
Build the library (just type `make`)
Clone the and build the repository, see below.
Include `libtcc/Correlator.h`, and link with `libtcc/libtcc.so`.
Create a `tcc::Correlator` object with the number of receivers, channels, etc.
......@@ -76,4 +63,52 @@ Limitations:
- the amount of samples over which is integrated) must be a multiple of 128 / `NR_BITS`
(i.e., 32, 16, or 8 for 4-bit, 8-bit, or 16-bit input, respectively).
## Building, testing, and installation
Clone the repository:
```bash
git clone https://git.astron.nl/RD/tensor-core-correlator.git
```
To build and install the project, run:
```bash
cmake -S . -B build
make -C build
make -C build install
```
To install in a custom location, e.g. `~/.local`, run:
```bash
cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local
make -C build
make -C build install
```
To compile and run the tests, run:
```bash
cmake -S. -B build -DBUILD_TESTING=ON
make -C build
make -C build test
```
The tests require a GPU.
On the DAS-6/ASTRON cluster you can request a GPU node and run the tests with the command:
```bash
srun -N 1 --gres=gpu:A4000:1 make -C build test
```
Note that in the command above a node with a NVIDIA A4000 GPU is requested, because the tests require a GPU that has tensor cores.
## Example usage
The `example` subdirectory has a minimal example that demonstrates how this
library can be integrated in another project. This example assumes that you
pre-installed both this library (`libtcc`) and `cudawrappers`. E.g. when
`libtcc` is installed in `<prefix>/libtcc` and `cudawrappers` is installed in
`<prefix>/cudawrappers`, you can build the example by running:
```bash
cmake . -DCMAKE_PREFIX_PATH="<prefix>/cudawrappers;<prefix>/tcc"
make
./example
```
## Bugs/feedback
Contact John Romein (romein@astron.nl) to report bugs/feedback
include(CMakeFindDependencyMacro)
find_package(CUDAToolkit @CUDA_MIN_VERSION@ REQUIRED)
foreach(component ${@PROJECT_NAME@_FIND_COMPONENTS})
include(${CMAKE_CURRENT_LIST_DIR}/${component}-config.cmake)
endforeach()
project(example)
cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
find_package(CUDAToolkit REQUIRED)
find_package(cudawrappers REQUIRED)
find_package(libtcc REQUIRED)
add_executable(${PROJECT_NAME} example.cpp)
target_link_libraries(${PROJECT_NAME} tcc cudawrappers::cu cudawrappers::nvrtc)
#include <cudawrappers/cu.hpp>
#include <libtcc/Correlator.h>
#define NR_BITS 8
#define NR_CHANNELS 480
#define NR_POLARIZATIONS 2
#define NR_SAMPLES_PER_CHANNEL 3072
#define NR_RECEIVERS 576
#define NR_RECEIVERS_PER_BLOCK 64
int main(int argc, char *argv[]) {
cu::init();
cu::Device device(0);
cu::Context context(0, device);
context.setCurrent();
tcc::Correlator correlator(NR_BITS, NR_RECEIVERS, NR_CHANNELS,
NR_SAMPLES_PER_CHANNEL, NR_POLARIZATIONS,
NR_RECEIVERS_PER_BLOCK);
}
Subproject commit d7f133c7e10c238d4bcceb2219f658eec64e9d7f
# Create tcc library
add_library(tcc)
# Add source files
target_sources(tcc PRIVATE Correlator.cc CorrelatorKernel.cc Kernel.cc)
# Add public headers
set_target_properties(
tcc PROPERTIES PUBLIC_HEADER "Correlator.h;CorrelatorKernel.h;Kernel.h"
)
# Add includes
target_include_directories(
tcc PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
)
# Add links
target_link_libraries(tcc PRIVATE cudawrappers::cu cudawrappers::nvrtc)
target_embed_source(tcc kernel/TCCorrelator.cu)
# Install libraries and headers
install(
TARGETS tcc
EXPORT ${PROJECT_NAME}-config # export tcc cmake targets
COMPONENT tcc
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
)
# Install tcc cmake targets
install(EXPORT ${PROJECT_NAME}-config
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
)
......@@ -7,7 +7,7 @@
#include <link.h>
extern const char _binary_libtcc_TCCorrelator_cu_start, _binary_libtcc_TCCorrelator_cu_end;
extern const char _binary_kernel_TCCorrelator_cu_start, _binary_kernel_TCCorrelator_cu_end;
namespace tcc {
......@@ -56,7 +56,7 @@ cu::Module Correlator::compileModule(unsigned nrBits,
const std::string &customStoreVisibility
)
{
cu::Device device(cu::Context::getCurrent().getDevice());
cu::Device device(0);
int capability = 10 * device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>() + device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>();
std::vector<std::string> options =
......@@ -78,15 +78,8 @@ cu::Module Correlator::compileModule(unsigned nrBits,
//std::for_each(options.begin(), options.end(), [] (const std::string &e) { std::cout << e << ' '; }); std::cout << std::endl;
#if 0
nvrtc::Program program("tcc/TCCorrelator.cu");
#else
// embed the CUDA source code in libtcc.so, so that it need not be installed separately
// for runtime compilation
// copy into std::string for '\0' termination
std::string source(&_binary_libtcc_TCCorrelator_cu_start, &_binary_libtcc_TCCorrelator_cu_end);
const std::string source(&_binary_kernel_TCCorrelator_cu_start, &_binary_kernel_TCCorrelator_cu_end);
nvrtc::Program program(source, "TCCorrelator.cu");
#endif
try {
program.compile(options);
......
#if !defined TCC_CORRELATOR_H
#define TCC_CORRELATOR_H
#include "libtcc/CorrelatorKernel.h"
#include <string>
#include <cudawrappers/cu.hpp>
#include <cudawrappers/nvrtc.hpp>
#include <string>
#include "libtcc/CorrelatorKernel.h"
namespace tcc {
class Correlator {
......
......@@ -5,7 +5,6 @@
#include <stdint.h>
namespace tcc {
class Kernel
{
......
File moved
cd external/cuda-wrappers
mkdir build-$(arch)
cd build-$(arch)
cmake -DCMAKE_INSTALL_PREFIX=$(pwd) ..
make install
cd ../../..
add_subdirectory(Common)
add_subdirectory(CorrelatorTest)
add_subdirectory(OpenCLCorrelatorTest)
add_subdirectory(SimpleExample)
foreach(component Record UnitTest)
add_library(${component})
target_sources(${component} PRIVATE ${component}.cc)
target_include_directories(${component} PRIVATE ${CMAKE_SOURCE_DIR})
target_link_libraries(${component} PUBLIC cudawrappers::cu)
endforeach()
......@@ -4,7 +4,6 @@
#include "test/Common/Config.h"
#include <cudawrappers/cu.hpp>
#if defined MEASURE_POWER
#include <powersensor/NVMLPowerSensor.h>
#endif
......
#if !defined UNIT_TEST_H
#define UNIT_TEST_H
#include "test/Common/Record.h"
#include <cudawrappers/cu.hpp>
#include "test/Common/Record.h"
#if defined MEASURE_POWER
#include <powersensor/NVMLPowerSensor.h>
......
find_package(OpenMP REQUIRED)
add_executable(CorrelatorTest)
target_sources(CorrelatorTest PRIVATE CorrelatorTest.cc Options.cc)
target_include_directories(CorrelatorTest PRIVATE ${CMAKE_SOURCE_DIR})
target_link_libraries(
CorrelatorTest PRIVATE tcc Record UnitTest OpenMP::OpenMP_CXX
)
# Define various combinations of parameters to test:
# b: nrBits must be 4, 8, or 16
# c: nrChannels
# d: deviceNumber
# n: nrReceivers
# N: nrReceiversPerBlock must be 32, 48, or 64
# r: innerRepeatCount
# R: outerRepeatCount
# t: nrSamplesPerChannel must be a multiple of (128 / nrBits)
# V: verifyOutput
set(ARGS0 -b 4 -c 1 -n 1 -N 32 -r 1 -R 1 -t 32)
set(ARGS1 -b 8 -c 1 -n 1 -N 48 -r 1 -R 1 -t 16)
set(ARGS2 -b 16 -c 1 -n 1 -N 64 -r 1 -R 1 -t 8)
set(ARGS3 -b 16 -c 2 -n 3 -N 32 -r 4 -R 5 -t 64)
foreach(idx RANGE 3)
add_test(NAME CorrelatorTest${idx} COMMAND CorrelatorTest ${ARGS${idx}})
endforeach()
# Add tests for nrReceivers with all primes below 768
foreach(idx
2 3 5 7 11 13 17 19 23 29 31 37 41
43 47 53 59 61 67 71 73 79 83 89 97 101
103 107 109 113 127 131 137 139 149 151 157 163 167
173 179 181 191 193 197 199 211 223 227 229 233 239
241 251 257 263 269 271 277 281 283 293 307 311 313
317 331 337 347 349 353 359 367 373 379 383 389 397
401 409 419 421 431 433 439 443 449 457 461 463 467
479 487 491 499 503 509 521 523 541 547 557 563 569
571 577 587 593 599 601 607 613 617 619 631 641 643
647 653 659 661 673 677 683 691 701 709 719 727 733
739 743 751 757 761)
add_test(NAME CorrelatorTest-nrReceivers-${idx}
COMMAND CorrelatorTest -b 16 -c 1 -n ${idx} -N 32 -r 1 -R 1 -t 8
)
endforeach()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment