diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 10b2ab9132c3ebecdc1c24611edf0c8d64ce390e..904d81afecd89cdf1cc4de5f233a0012fb84022e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -11,10 +11,11 @@ build:
     - das6
   script:
     - source scripts/load-modules.sh
-    - source scripts/build-cudawrappers.sh
+    - mkdir build && cd build
+    - cmake ..
     - make -j
 
-test-example:
+test:
   stage: testing
   tags:
     - das6
@@ -22,21 +23,7 @@ test-example:
     - build
   script:
     - source scripts/load-modules.sh
-    - source scripts/build-cudawrappers.sh
+    - mkdir build && cd build
+    - cmake .. -DBUILD_TESTING=On
     - make -j
-    - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/libtcc
-    - build-$(arch)/test/SimpleExample/SimpleExample
-
-test-correlator:
-  stage: testing
-  tags:
-    - das6
-  dependencies:
-    - build
-  script:
-    - source scripts/load-modules.sh
-    - source scripts/build-cudawrappers.sh
-    - make -j
-    - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(pwd)/libtcc
-    - build-$(arch)/test/CorrelatorTest/CorrelatorTest -n 20
-
+    - make test
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 5c2de547ee7275e615821173bf63de8982dd7207..0000000000000000000000000000000000000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "external/cuda-wrappers"]
-	path = external/cuda-wrappers
-	url = https://github.com/nlesc-recruit/CUDA-wrappers.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1906bf3706cbf187fb2040933db8da4ef90598dc
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,56 @@
+cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
+
+project(
+  libtcc
+  DESCRIPTION "Tensor-Core Correlator"
+  VERSION 0.5
+  HOMEPAGE_URL https://git.astron.nl/RD/tensor-core-correlator
+  LANGUAGES CXX CUDA
+)
+
+set(CMAKE_CXX_STANDARD 17)
+option(BUILD_SHARED_LIBS "Create shared libraries" True)
+option(BUILD_TESTING "Build tests" False)
+
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE
+      "Release"
+      CACHE STRING "CMake build type" FORCE
+  )
+  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release")
+endif()
+
+find_package(CUDAToolkit REQUIRED)
+
+include(FetchContent)
+FetchContent_Declare(
+  cudawrappers
+  GIT_REPOSITORY https://github.com/nlesc-recruit/cudawrappers
+  GIT_TAG 0.6.0
+)
+FetchContent_MakeAvailable(cudawrappers)
+
+# Set up libtcc
+add_subdirectory(libtcc)
+
+# Set up tests
+include(CTest)
+if(BUILD_TESTING)
+  add_subdirectory(test)
+endif()
+
+# Install project cmake targets
+include(CMakePackageConfigHelpers)
+write_basic_package_version_file(
+  ${PROJECT_NAME}-config-version.cmake
+  VERSION ${cudawrappers_VERSION}
+  COMPATIBILITY AnyNewerVersion
+)
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
+)
+
+# --- auto-ignore build directory
+if(NOT EXISTS ${PROJECT_BINARY_DIR}/.gitignore)
+  file(WRITE ${PROJECT_BINARY_DIR}/.gitignore "*")
+endif()
diff --git a/Makefile b/Makefile
deleted file mode 100644
index c062e208ae6df0e979d6730b9e1b1da3c3cd0f18..0000000000000000000000000000000000000000
--- a/Makefile
+++ /dev/null
@@ -1,117 +0,0 @@
-VERSION=		0.8
-CUDA=			$(shell dirname `dirname \`which nvcc\``)
-#CUDA=			/usr/local/cuda
-CUDA_INCLUDE=		$(shell dirname `find $(CUDA)/ -name cuda.h`)
-CUDA_LIBDIR=		$(shell dirname `find $(CUDA)/ -name libcuda.so`|head -n1)
-NVRTC_LIBDIR=		$(shell dirname `find $(CUDA)/ -name libnvrtc.so`|tail -n1)
-#POWER_SENSOR=		$(HOME)/projects/PowerSensor3
-ARCH=			$(shell arch)
-CC=			gcc
-CXX=			g++ #-Wno-deprecated-declarations
-NVCC=			nvcc
-INCLUDES=		-I.
-CUDAWRAPPERS_LIBDIR=external/cuda-wrappers/build-${ARCH}
-CUDAWRAPPERS_INCLUDE=external/cuda-wrappers/include
-CUDAWRAPPERSFLAGS=		-Xlinker -rpath=${CUDAWRAPPERS_LIBDIR} ${CUDAWRAPPERS_LIBDIR}/cudawrappers-cu.so ${CUDAWRAPPERS_LIBDIR}/cudawrappers-nvrtc.so
-INCLUDES+=		-I$(CUDA_INCLUDE) -I${CUDAWRAPPERS_INCLUDE}
-#INCLUDES+=		-I$(CUDA_INCLUDE) -I$(NVRTC_INCLUDE)
-#INCLUDES+=		-I$(POWER_SENSOR)/host/include
-CXXFLAGS+=		-std=c++17 -O3 -g -fpic -fopenmp $(INCLUDES) -DNDEBUG
-NVCCFLAGS=		-std c++14 $(INCLUDES)
-
-#CXXFLAGS+=		-march=core-avx2 -mcmodel=medium
-
-BUILD_DIR=		build-$(ARCH)
-BUILD_SUB_DIRS=		$(BUILD_DIR) $(BUILD_DIR)/libtcc $(BUILD_DIR)/test $(BUILD_DIR)/test/Common $(BUILD_DIR)/test/SimpleExample $(BUILD_DIR)/test/CorrelatorTest $(BUILD_DIR)/test/OpenCLCorrelatorTest
-
-LIBTCC_SOURCES=		libtcc/CorrelatorKernel.cc\
-			libtcc/Correlator.cc\
-			libtcc/Kernel.cc
-
-
-CORRELATOR_TEST_SOURCES=test/CorrelatorTest/CorrelatorTest.cc\
-			test/CorrelatorTest/Options.cc\
-			test/Common/Record.cc\
-			test/Common/UnitTest.cc
-
-OPENCL_TEST_SOURCES=	test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc
-
-SIMPLE_EXAMPLE_SOURCES=	test/SimpleExample/SimpleExample.cu
-
-
-LIBTCC_OBJECTS=		$(LIBTCC_SOURCES:%.cc=$(BUILD_DIR)/%.o) $(BUILD_DIR)/libtcc/TCCorrelator.o
-SIMPLE_EXAMPLE_OBJECTS=	$(SIMPLE_EXAMPLE_SOURCES:%.cu=$(BUILD_DIR)/%.o)
-CORRELATOR_TEST_OBJECTS=$(CORRELATOR_TEST_SOURCES:%.cc=$(BUILD_DIR)/%.o)
-OPENCL_TEST_OBJECTS=	$(OPENCL_TEST_SOURCES:%.cc=$(BUILD_DIR)/%.o)
-
-OBJECTS=		$(LIBTCC_OBJECTS)\
-			$(SIMPLE_EXAMPLE_OBJECTS)\
-			$(CORRELATOR_TEST_OBJECTS)\
-			$(OPENCL_TEST_OBJECTS)
-
-SHARED_OBJECTS=		$(BUILD_DIR)/libtcc/libtcc.so $(BUILD_DIR)/libtcc/libtcc.so.$(VERSION)
-
-DEPENDENCIES=		$(OBJECTS:%.o=%.d)
-
-EXECUTABLES=		$(BUILD_DIR)/test/SimpleExample/SimpleExample\
-			$(BUILD_DIR)/test/CorrelatorTest/CorrelatorTest
-
-ifneq			("$(wildcard $(CUDA_INCLUDE)/CL/cl.hpp)", "")
-EXECUTABLES+=		$(BUILD_DIR)/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest
-endif
-
-LIBRARIES=		-L$(CUDA_LIBDIR) -lcuda
-LIBRARIES+=		-L$(NVRTC_LIBDIR) -lnvrtc -Xlinker -rpath=$(NVRTC_LIBDIR)
-#LIBRARIES+=		-L$(POWER_SENSOR)/build-$(ARCH)/host -lPowerSensor -lnvidia-ml
-
-
-$(BUILD_DIR)/%.d:	%.cc
-			-$(CXX) $(CXXFLAGS) -MM -MT $@ -MT ${@:%.d=%.o} -MT ${@:%.d=%.s} $< -o $@
-
-$(BUILD_DIR)/%.d:	%.cu
-			-$(CXX) -x c++ $(CXXFLAGS) -MM -MT $@ -MT ${@:%.d=%.o} -MT ${@:%.d=%.s} $< -o $@
-
-$(BUILD_DIR)/%.o:	%.cc
-			$(CXX) $(CXXFLAGS) -o $@ -c $<
-
-$(BUILD_DIR)/%.o:	%.cu
-			$(NVCC) $(NVCCFLAGS) -o $@ -c $<
-
-$(BUILD_DIR)/%.s:	%.cc
-			$(CXX) $(CXXFLAGS) -o $@ -S $<
-
-$(BUILD_DIR)/%.so:	$(BUILD_DIR)/%.so.$(VERSION)
-			rm -f $@
-			ln -s $(@F).$(VERSION) $@
-
-all::			$(EXECUTABLES)
-
-clean::
-			rm -rf $(BUILD_DIR)
-
-$(OBJECTS) $(SHARED_OBJECTS) $(DEPENDENCIES) $(EXECUTABLES): $(BUILD_DIR)
-
-$(BUILD_DIR):
-			mkdir -p $(BUILD_SUB_DIRS)
-
-$(BUILD_DIR)/libtcc/TCCorrelator.o:	libtcc/TCCorrelator.cu	# CUDA code embedded in object file
-			ld -r -b binary -o $@ $<
-
-$(BUILD_DIR)/libtcc/TCCorrelator.d:
-			-
-
-$(BUILD_DIR)/libtcc/libtcc.so.$(VERSION): $(LIBTCC_OBJECTS)
-			$(CXX) -shared -o $@ $(LIBTCC_OBJECTS) $(LIBRARIES)
-
-$(BUILD_DIR)/test/SimpleExample/SimpleExample: $(SIMPLE_EXAMPLE_OBJECTS) $(BUILD_DIR)/libtcc/libtcc.so
-			$(NVCC) $(NVCCFLAGS) -o $@ $(SIMPLE_EXAMPLE_OBJECTS) -Xlinker -rpath=$(BUILD_DIR)/libtcc -L$(BUILD_DIR)/libtcc -ltcc $(LIBRARIES) ${CUDAWRAPPERSFLAGS}
-
-$(BUILD_DIR)/test/CorrelatorTest/CorrelatorTest: $(CORRELATOR_TEST_OBJECTS) $(BUILD_DIR)/libtcc/libtcc.so
-			$(CXX) $(CXXFLAGS) -o $@ $(CORRELATOR_TEST_OBJECTS) -Wl,-rpath=$(BUILD_DIR)/libtcc -L$(BUILD_DIR)/libtcc -ltcc $(LIBRARIES) ${CUDAWRAPPERSFLAGS}
-
-$(BUILD_DIR)/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest: $(OPENCL_TEST_OBJECTS)
-			$(CXX) $(CXXFLAGS) -o $@ $(OPENCL_TEST_OBJECTS) -L$(CUDA)/lib64 -lOpenCL
-
-ifeq (0, $(words $(findstring $(MAKECMDGOALS), clean)))
--include $(DEPENDENCIES)
-endif
diff --git a/README.md b/README.md
index d6cd33ca447b611eb786d246794d0713be0e512d..a1ee014fe75b6d980bdec002227e6d7fb0660a59 100644
--- a/README.md
+++ b/README.md
@@ -11,20 +11,7 @@ _Astronomy and Astrophysics_, 656(A32), pages 1-4, December 2021).
 
 ## Brief overview on how to use the Tensor-Core Correlator library:
 
-Clone the repository (`git clone --recursive`)
-
-Build [cudawrappers](https://github.com/nlesc-recruit/cudawrappers):
-```
-cd external/cuda-wrappers
-mkdir build-$(arch)
-cd build-$(arch)
-cmake -DCMAKE_INSTALL_PREFIX=$(pwd) ..
-make install
-cd ../../..
-```
-In a later release, cudawrappers wil be header-only and this step will no longer be needed.
-
-Build the library (just type `make`)
+Clone the and build the repository, see below.
 
 Include `libtcc/Correlator.h`, and link with `libtcc/libtcc.so`.
 Create a `tcc::Correlator` object with the number of receivers, channels, etc.
@@ -76,4 +63,52 @@ Limitations:
 - the amount of samples over which is integrated) must be a multiple of 128 / `NR_BITS`
   (i.e., 32, 16, or 8 for 4-bit, 8-bit, or 16-bit input, respectively).
 
+## Building, testing, and installation
+Clone the repository:
+```bash
+git clone https://git.astron.nl/RD/tensor-core-correlator.git
+```
+
+To build and install the project, run:
+```bash
+cmake -S . -B build
+make -C build
+make -C build install
+```
+
+To install in a custom location, e.g. `~/.local`, run:
+```bash
+cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local
+make -C build
+make -C build install
+```
+
+To compile and run the tests, run:
+```bash
+cmake -S. -B build -DBUILD_TESTING=ON
+make -C build
+make -C build test
+```
+The tests require a GPU.
+On the DAS-6/ASTRON cluster you can request a GPU node and run the tests with the command:
+```bash
+srun -N 1 --gres=gpu:A4000:1 make -C build test
+```
+
+Note that in the command above a node with a NVIDIA A4000 GPU is requested, because the tests require a GPU that has tensor cores.
+
+## Example usage
+The `example` subdirectory has a minimal example that demonstrates how this
+library can be integrated in another project. This example assumes that you
+pre-installed both this library (`libtcc`) and `cudawrappers`. E.g. when
+`libtcc` is installed in `<prefix>/libtcc` and `cudawrappers` is installed in
+`<prefix>/cudawrappers`, you can build the example by running:
+```bash
+cmake . -DCMAKE_PREFIX_PATH="<prefix>/cudawrappers;<prefix>/tcc"
+make
+./example
+```
+
+
+## Bugs/feedback
 Contact John Romein (romein@astron.nl) to report bugs/feedback
diff --git a/cmake/tensor-core-correlator-config.cmake.in b/cmake/tensor-core-correlator-config.cmake.in
new file mode 100644
index 0000000000000000000000000000000000000000..f670102752c3e4a91da1e313572642831ffeb9da
--- /dev/null
+++ b/cmake/tensor-core-correlator-config.cmake.in
@@ -0,0 +1,7 @@
+include(CMakeFindDependencyMacro)
+
+find_package(CUDAToolkit @CUDA_MIN_VERSION@ REQUIRED)
+
+foreach(component ${@PROJECT_NAME@_FIND_COMPONENTS})
+  include(${CMAKE_CURRENT_LIST_DIR}/${component}-config.cmake)
+endforeach()
diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..cdeba691a11e65b79417db4bc9f2cec7cb5a5f99
--- /dev/null
+++ b/example/CMakeLists.txt
@@ -0,0 +1,9 @@
+project(example)
+cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
+
+find_package(CUDAToolkit REQUIRED)
+find_package(cudawrappers REQUIRED)
+find_package(libtcc REQUIRED)
+
+add_executable(${PROJECT_NAME} example.cpp)
+target_link_libraries(${PROJECT_NAME} tcc cudawrappers::cu cudawrappers::nvrtc)
diff --git a/example/example.cpp b/example/example.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..49d5b3b174a923bf9bd1c437f0bc55bc1e6f5895
--- /dev/null
+++ b/example/example.cpp
@@ -0,0 +1,19 @@
+#include <cudawrappers/cu.hpp>
+#include <libtcc/Correlator.h>
+
+#define NR_BITS 8
+#define NR_CHANNELS 480
+#define NR_POLARIZATIONS 2
+#define NR_SAMPLES_PER_CHANNEL 3072
+#define NR_RECEIVERS 576
+#define NR_RECEIVERS_PER_BLOCK 64
+
+int main(int argc, char *argv[]) {
+  cu::init();
+  cu::Device device(0);
+  cu::Context context(0, device);
+  context.setCurrent();
+  tcc::Correlator correlator(NR_BITS, NR_RECEIVERS, NR_CHANNELS,
+                             NR_SAMPLES_PER_CHANNEL, NR_POLARIZATIONS,
+                             NR_RECEIVERS_PER_BLOCK);
+}
diff --git a/external/cuda-wrappers b/external/cuda-wrappers
deleted file mode 160000
index d7f133c7e10c238d4bcceb2219f658eec64e9d7f..0000000000000000000000000000000000000000
--- a/external/cuda-wrappers
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit d7f133c7e10c238d4bcceb2219f658eec64e9d7f
diff --git a/libtcc/CMakeLists.txt b/libtcc/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f28f6335c190b83a1c00216821873e2c559b7173
--- /dev/null
+++ b/libtcc/CMakeLists.txt
@@ -0,0 +1,27 @@
+# Create tcc library
+add_library(tcc)
+# Add source files
+target_sources(tcc PRIVATE Correlator.cc CorrelatorKernel.cc Kernel.cc)
+# Add public headers
+set_target_properties(
+  tcc PROPERTIES PUBLIC_HEADER "Correlator.h;CorrelatorKernel.h;Kernel.h"
+)
+# Add includes
+target_include_directories(
+  tcc PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
+             $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
+)
+# Add links
+target_link_libraries(tcc PRIVATE cudawrappers::cu cudawrappers::nvrtc)
+target_embed_source(tcc kernel/TCCorrelator.cu)
+# Install libraries and headers
+install(
+  TARGETS tcc
+  EXPORT ${PROJECT_NAME}-config # export tcc cmake targets
+  COMPONENT tcc
+  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
+)
+# Install tcc cmake targets
+install(EXPORT ${PROJECT_NAME}-config
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
+)
diff --git a/libtcc/Correlator.cc b/libtcc/Correlator.cc
index 14d54eef021df399c5032b6185f711e6a1fe56f5..a27af39c58211fdb792b5ae4a37a6efe8b7e2c13 100644
--- a/libtcc/Correlator.cc
+++ b/libtcc/Correlator.cc
@@ -7,7 +7,7 @@
 #include <link.h>
 
 
-extern const char _binary_libtcc_TCCorrelator_cu_start, _binary_libtcc_TCCorrelator_cu_end;
+extern const char _binary_kernel_TCCorrelator_cu_start, _binary_kernel_TCCorrelator_cu_end;
 
 namespace tcc {
 
@@ -56,7 +56,7 @@ cu::Module Correlator::compileModule(unsigned nrBits,
 				     const std::string &customStoreVisibility
 				    )
 {
-  cu::Device device(cu::Context::getCurrent().getDevice());
+  cu::Device device(0);
   int capability = 10 * device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR>() + device.getAttribute<CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR>();
 
   std::vector<std::string> options =
@@ -78,15 +78,8 @@ cu::Module Correlator::compileModule(unsigned nrBits,
 
   //std::for_each(options.begin(), options.end(), [] (const std::string &e) { std::cout << e << ' '; }); std::cout << std::endl;
 
-#if 0
-  nvrtc::Program program("tcc/TCCorrelator.cu");
-#else
-  // embed the CUDA source code in libtcc.so, so that it need not be installed separately
-  // for runtime compilation
-  // copy into std::string for '\0' termination
-  std::string source(&_binary_libtcc_TCCorrelator_cu_start, &_binary_libtcc_TCCorrelator_cu_end);
+  const std::string source(&_binary_kernel_TCCorrelator_cu_start, &_binary_kernel_TCCorrelator_cu_end);
   nvrtc::Program program(source, "TCCorrelator.cu");
-#endif
 
   try {
     program.compile(options);
diff --git a/libtcc/Correlator.h b/libtcc/Correlator.h
index df733d54bcf7e07d68d8cb8dd01f273550063968..a26f56e5d81039f875c42a2d0947023b773306b3 100644
--- a/libtcc/Correlator.h
+++ b/libtcc/Correlator.h
@@ -1,12 +1,12 @@
 #if !defined TCC_CORRELATOR_H
 #define TCC_CORRELATOR_H
 
-#include "libtcc/CorrelatorKernel.h"
+#include <string>
+
 #include <cudawrappers/cu.hpp>
 #include <cudawrappers/nvrtc.hpp>
 
-#include <string>
-
+#include "libtcc/CorrelatorKernel.h"
 
 namespace tcc {
   class Correlator {
diff --git a/libtcc/Kernel.h b/libtcc/Kernel.h
index b9a1bf4045ec7c938220b6ba379be9434cdc343f..1663577f9796d56d02c25e29dcea2abcb127e302 100644
--- a/libtcc/Kernel.h
+++ b/libtcc/Kernel.h
@@ -5,7 +5,6 @@
 
 #include <stdint.h>
 
-
 namespace tcc {
   class Kernel
   {
diff --git a/libtcc/TCCorrelator.cu b/libtcc/kernel/TCCorrelator.cu
similarity index 100%
rename from libtcc/TCCorrelator.cu
rename to libtcc/kernel/TCCorrelator.cu
diff --git a/scripts/build-cudawrappers.sh b/scripts/build-cudawrappers.sh
deleted file mode 100644
index 8b9131eb52789ed4b8f4e1131459e466b75935ff..0000000000000000000000000000000000000000
--- a/scripts/build-cudawrappers.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-cd external/cuda-wrappers
-mkdir build-$(arch)
-cd build-$(arch)
-cmake -DCMAKE_INSTALL_PREFIX=$(pwd) ..
-make install
-cd ../../..
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ca29168a1b4ad4a5941186e493a3ae5dcd44a21
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_subdirectory(Common)
+add_subdirectory(CorrelatorTest)
+add_subdirectory(OpenCLCorrelatorTest)
+add_subdirectory(SimpleExample)
diff --git a/test/Common/CMakeLists.txt b/test/Common/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0e6e222c9614cbd674199faa4c9828f7e759e10d
--- /dev/null
+++ b/test/Common/CMakeLists.txt
@@ -0,0 +1,6 @@
+foreach(component Record UnitTest)
+  add_library(${component})
+  target_sources(${component} PRIVATE ${component}.cc)
+  target_include_directories(${component} PRIVATE ${CMAKE_SOURCE_DIR})
+  target_link_libraries(${component} PUBLIC cudawrappers::cu)
+endforeach()
diff --git a/test/Common/Record.h b/test/Common/Record.h
index 8ef3b0f6d4408a816d79ba4afdd52d5230de27df..439d15da7d778ce1238dad562ccc01757d881e0d 100644
--- a/test/Common/Record.h
+++ b/test/Common/Record.h
@@ -4,7 +4,6 @@
 #include "test/Common/Config.h"
 
 #include <cudawrappers/cu.hpp>
-
 #if defined MEASURE_POWER
 #include <powersensor/NVMLPowerSensor.h>
 #endif
diff --git a/test/Common/UnitTest.h b/test/Common/UnitTest.h
index cb28715fc216a91f13532cb4df7446d35ae486f7..e0bec26eebd317753033f7f68fa72263c93ebce5 100644
--- a/test/Common/UnitTest.h
+++ b/test/Common/UnitTest.h
@@ -1,8 +1,8 @@
 #if !defined UNIT_TEST_H
 #define UNIT_TEST_H
 
-#include "test/Common/Record.h"
 #include <cudawrappers/cu.hpp>
+#include "test/Common/Record.h"
 
 #if defined MEASURE_POWER
 #include <powersensor/NVMLPowerSensor.h>
diff --git a/test/CorrelatorTest/CMakeLists.txt b/test/CorrelatorTest/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..89098ba5b489e07e5bdf7ac96486fbe629625a95
--- /dev/null
+++ b/test/CorrelatorTest/CMakeLists.txt
@@ -0,0 +1,45 @@
+find_package(OpenMP REQUIRED)
+
+add_executable(CorrelatorTest)
+target_sources(CorrelatorTest PRIVATE CorrelatorTest.cc Options.cc)
+target_include_directories(CorrelatorTest PRIVATE ${CMAKE_SOURCE_DIR})
+target_link_libraries(
+  CorrelatorTest PRIVATE tcc Record UnitTest OpenMP::OpenMP_CXX
+)
+
+# Define various combinations of parameters to test:
+# b: nrBits must be 4, 8, or 16
+# c: nrChannels
+# d: deviceNumber
+# n: nrReceivers
+# N: nrReceiversPerBlock must be 32, 48, or 64
+# r: innerRepeatCount
+# R: outerRepeatCount
+# t: nrSamplesPerChannel must be a multiple of (128 / nrBits)
+# V: verifyOutput
+set(ARGS0 -b  4 -c 1 -n  1 -N 32 -r 1 -R 1 -t 32)
+set(ARGS1 -b  8 -c 1 -n  1 -N 48 -r 1 -R 1 -t 16)
+set(ARGS2 -b 16 -c 1 -n  1 -N 64 -r 1 -R 1 -t  8)
+set(ARGS3 -b 16 -c 2 -n  3 -N 32 -r 4 -R 5 -t 64)
+
+foreach(idx RANGE 3)
+  add_test(NAME CorrelatorTest${idx} COMMAND CorrelatorTest ${ARGS${idx}})
+endforeach()
+
+# Add tests for nrReceivers with all primes below 768
+foreach(idx
+  2     3     5     7    11    13    17    19    23    29    31    37    41
+ 43    47    53    59    61    67    71    73    79    83    89    97   101
+103   107   109   113   127   131   137   139   149   151   157   163   167
+173   179   181   191   193   197   199   211   223   227   229   233   239
+241   251   257   263   269   271   277   281   283   293   307   311   313
+317   331   337   347   349   353   359   367   373   379   383   389   397
+401   409   419   421   431   433   439   443   449   457   461   463   467
+479   487   491   499   503   509   521   523   541   547   557   563   569
+571   577   587   593   599   601   607   613   617   619   631   641   643
+647   653   659   661   673   677   683   691   701   709   719   727   733
+739   743   751   757   761)
+  add_test(NAME CorrelatorTest-nrReceivers-${idx}
+           COMMAND CorrelatorTest -b 16 -c 1 -n ${idx} -N 32 -r 1 -R 1 -t 8
+  )
+endforeach()
diff --git a/test/CorrelatorTest/CorrelatorTest.cc b/test/CorrelatorTest/CorrelatorTest.cc
index 36fa1ce6ab673f6125b81da554c020fdbe88d2ae..9237b2e1f937fa8f7cd3c168d8d59cbadaa16600 100644
--- a/test/CorrelatorTest/CorrelatorTest.cc
+++ b/test/CorrelatorTest/CorrelatorTest.cc
@@ -1,18 +1,20 @@
-#include "test/Common/ComplexInt4.h"
-#include "test/Common/Record.h"
 #include "test/CorrelatorTest/CorrelatorTest.h"
 #include "util/ExceptionPropagator.h"
-#include <cudawrappers/nvrtc.hpp>
 
 #include <cstdlib>
 #include <cstring>
 #include <iostream>
 
+#include <cudawrappers/nvrtc.hpp>
+
+#include "test/Common/ComplexInt4.h"
+#include "test/Common/Record.h"
+#include "util/ExceptionPropagator.h"
+
 #define GNU_SOURCE
 #include <link.h>
 #include <omp.h>
 
-
 CorrelatorTest::CorrelatorTest(const Options &options)
 :
   UnitTest(options.deviceNumber),
@@ -223,17 +225,21 @@ template<typename SampleType, typename VisibilityType> void CorrelatorTest::veri
 
 int main(int argc, char *argv[])
 {
+  int err{0};
   try {
     cu::init();
     Options options(argc, argv);
     CorrelatorTest test(options);
   } catch (cu::Error &error) {
     std::cerr << "cu::Error: " << error.what() << std::endl;
+    err = 1;
   } catch (nvrtc::Error &error) {
     std::cerr << "nvrtc::Error: " << error.what() << std::endl;
+    err = 1;
   } catch (Options::Error &error) {
     std::cerr << "Options::Error: " << error.what() << std::endl;
+    err = 1;
   }
 
-  return 0;
+  return err;
 }
diff --git a/test/OpenCLCorrelatorTest/CMakeLists.txt b/test/OpenCLCorrelatorTest/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..84117b5edf3259e3350ac297af1fd786f642f4c2
--- /dev/null
+++ b/test/OpenCLCorrelatorTest/CMakeLists.txt
@@ -0,0 +1,19 @@
+project(OpenCLCorrelatorTest)
+find_package(OpenMP REQUIRED)
+find_package(OpenCL)
+
+if(${OpenCL_FOUND})
+  set(KERNEL_SOURCE_FILE "${CMAKE_SOURCE_DIR}/libtcc/kernel/TCCorrelator.cu")
+  add_executable(${PROJECT_NAME})
+  target_sources(${PROJECT_NAME} PRIVATE OpenCLCorrelatorTest.cc)
+  target_include_directories(
+    ${PROJECT_NAME} PRIVATE ${CMAKE_SOURCE_DIR} ${OpenCL_INCLUDE_DIRS}
+  )
+  target_link_libraries(
+    ${PROJECT_NAME} PRIVATE ${OpenCL_LIBRARIES} OpenMP::OpenMP_CXX
+  )
+  target_compile_definitions(
+    ${PROJECT_NAME} PRIVATE KERNEL_SOURCE_FILE="${KERNEL_SOURCE_FILE}"
+  )
+
+endif()
diff --git a/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc b/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc
index 518705463f662f1120428d643f40141dfb2d66dd..067e966e92fa1082d8c572414b5a183e6628ffd3 100644
--- a/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc
+++ b/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc
@@ -248,8 +248,8 @@ cl::Program createProgramFromBinaries(cl::Context &context, std::vector<cl::Devi
 	    << " -DNR_SAMPLES_PER_CHANNEL=" << NR_SAMPLES_PER_CHANNEL
 	    << " -DNR_POLARIZATIONS=" << NR_POLARIZATIONS
 	    << " -DNR_RECEIVERS_PER_BLOCK=" << NR_RECEIVERS_PER_BLOCK
-	    << " -o -"
-	    << " libtcc/TCCorrelator.cu"
+	    << " -o - "
+	    << KERNEL_SOURCE_FILE
 	    << "|sed -e s/.param\\ .[a-zA-Z0-9]*/\\&\\ .ptr\\ .global/";
 
     std::clog << "executing: " << command.str() << std::endl;
@@ -415,6 +415,7 @@ void checkTestPattern(cl::CommandQueue &queue, cl::Buffer &visibilitiesBuffer, c
 
 int main()
 {
+  int err{0};
   try {
     cl::Context             context;
     std::vector<cl::Device> devices;
@@ -456,9 +457,11 @@ for (int i = 0; i < 100; i ++)
     checkTestPattern(queue, visibilities, samples);
    } catch (cl::Error &error) {
      std::cerr << "caught cl::Error: " << error.what() << ": " << errorMessage(error.err()) << std::endl;
+     err = 1;
    } catch (std::exception &error) {
      std::cerr << "caught std::exception: " << error.what() << std::endl;
+     err = 1;
    }
 
-  return 0;
+  return err;
 }
diff --git a/test/SimpleExample/CMakeLists.txt b/test/SimpleExample/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5def298c2857a4c7a5e7d387d74dcf4d6216af50
--- /dev/null
+++ b/test/SimpleExample/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_executable(SimpleExample)
+set_property(TARGET SimpleExample PROPERTY CUDA_ARCHITECTURES OFF)
+target_sources(SimpleExample PRIVATE SimpleExample.cu)
+target_include_directories(SimpleExample PRIVATE ${CMAKE_SOURCE_DIR})
+target_link_libraries(SimpleExample PRIVATE tcc cudawrappers::cu)
+
+add_test(NAME SimpleExample COMMAND SimpleExample)
diff --git a/test/SimpleExample/SimpleExample.cu b/test/SimpleExample/SimpleExample.cu
index ba862d35587c462431b81a10d61f097cd49c826b..25846a09bc822e7eefc254815bf0083a35596cac 100644
--- a/test/SimpleExample/SimpleExample.cu
+++ b/test/SimpleExample/SimpleExample.cu
@@ -7,14 +7,14 @@
 #define NR_RECEIVERS_PER_BLOCK 64
 #define NR_TIMES_PER_BLOCK (128 / (NR_BITS))
 
-
-#include "test/Common/ComplexInt4.h"
-#include "libtcc/Correlator.h"
-
 #include <complex>
 #include <iostream>
 
 #include <cuda.h>
+
+#include "test/Common/ComplexInt4.h"
+#include "libtcc/Correlator.h"
+
 #if NR_BITS == 16
 #include <cuda_fp16.h>
 #endif
@@ -46,6 +46,7 @@ typedef Visibility Visibilities[NR_CHANNELS][NR_BASELINES][NR_POLARIZATIONS][NR_
 
 int main()
 {
+  int err{0};
   try {
     checkCudaCall(cudaSetDevice(0)); // combine the CUDA runtime API and CUDA driver API
     checkCudaCall(cudaFree(0));
@@ -73,5 +74,7 @@ int main()
     checkCudaCall(cudaStreamDestroy(stream));
   } catch (std::exception &error) {
     std::cerr << error.what() << std::endl;
+    err = 1;
   }
+  return err;
 }