diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..1b33bd758f9abce1913504c535e918b92e788d72
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+Testing
+*.d
+*~
diff --git a/.gitmodules b/.gitmodules
index 5c2de547ee7275e615821173bf63de8982dd7207..4733efab20a95666128a3556825b9cde0f70a816 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,3 @@
 [submodule "external/cuda-wrappers"]
-	path = external/cuda-wrappers
+	path = external/cudawrappers
 	url = https://github.com/nlesc-recruit/CUDA-wrappers.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f33b535451c03b937d4bd38fa3292747ff9c5751
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,46 @@
+cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
+
+project(
+  tensor_core_correlator
+  DESCRIPTION "Tensor-Core Correlator"
+  VERSION 0.5
+  HOMEPAGE_URL https://git.astron.nl/RD/tensor-core-correlator
+  LANGUAGES CXX CUDA)
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_BUILD_TYPE Release)
+option(BUILD_SHARED_LIBS "Create shared libraries" True)
+option(BUILD_TESTING "Build tests" False)
+
+# Use CMAKE_INSTALL_PREFIX when searching for libraries
+include(GNUInstallDirs)
+list(APPEND CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_FULL_LIBDIR})
+
+# Add cudawrappers dependency
+add_subdirectory(external/cudawrappers)
+
+# Set up libtcc
+add_subdirectory(libtcc)
+
+# Set up tests
+include(CTest)
+if(BUILD_TESTING)
+  add_subdirectory(test)
+endif()
+
+# Install project cmake targets
+include(CMakePackageConfigHelpers)
+configure_file(cmake/${PROJECT_NAME}-config.cmake.in
+               ${PROJECT_NAME}-config.cmake @ONLY)
+write_basic_package_version_file(
+  ${PROJECT_NAME}-config-version.cmake
+  VERSION ${cudawrappers_VERSION}
+  COMPATIBILITY AnyNewerVersion)
+install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake
+              ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake
+        DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME})
+
+# --- auto-ignore build directory
+if(NOT EXISTS ${PROJECT_BINARY_DIR}/.gitignore)
+  file(WRITE ${PROJECT_BINARY_DIR}/.gitignore "*")
+endif()
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 8ca79920818920c742bfa1389e8f014fe60c7f30..0000000000000000000000000000000000000000
--- a/Makefile
+++ /dev/null
@@ -1,113 +0,0 @@
-VERSION=		0.5
-CUDA=			$(shell dirname `dirname \`which nvcc\``)
-#CUDA=			/usr/local/cuda
-CUDA_INCLUDE=		$(shell dirname `find $(CUDA) -name cuda.h`)
-CUDA_LIBDIR=		$(shell dirname `find $(CUDA) -name libcuda.so`|head -n1)
-NVRTC_LIBDIR=		$(shell dirname `find $(CUDA) -name libnvrtc.so`|head -n1)
-#POWER_SENSOR=		$(HOME)/projects/libpowersensor-master/build
-ARCH=			$(shell arch)
-CC=			gcc
-CXX=			g++ #-Wno-deprecated-declarations
-NVCC=			nvcc
-INCLUDES=		-I.
-INCLUDES+=		-I$(CUDA_INCLUDE)
-#INCLUDES+=		-I$(CUDA_INCLUDE) -I$(NVRTC_INCLUDE)
-#INCLUDES+=		-I$(POWER_SENSOR)/include
-CXXFLAGS+=		-std=c++11 -O3 -g -fpic -fopenmp $(INCLUDES) -DNDEBUG
-NVCCFLAGS=		$(INCLUDES)
-
-#CXXFLAGS+=		-march=core-avx2 -mcmodel=medium
-
-LIBTCC_SOURCES=		libtcc/CorrelatorKernel.cc\
-			libtcc/Correlator.cc\
-			libtcc/Kernel.cc
-
-
-CORRELATOR_TEST_SOURCES=test/CorrelatorTest/CorrelatorTest.cc\
-			test/CorrelatorTest/Options.cc\
-			test/Common/Record.cc\
-			test/Common/UnitTest.cc
-
-OPENCL_TEST_SOURCES=	test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc
-
-SIMPLE_EXAMPLE_SOURCES=	test/SimpleExample/SimpleExample.cu
-
-
-LIBTCC_OBJECTS=		$(LIBTCC_SOURCES:%.cc=%.o) libtcc/TCCorrelator.o
-SIMPLE_EXAMPLE_OBJECTS=	$(SIMPLE_EXAMPLE_SOURCES:%.cu=%.o)
-CORRELATOR_TEST_OBJECTS=$(CORRELATOR_TEST_SOURCES:%.cc=%.o)
-OPENCL_TEST_OBJECTS=	$(OPENCL_TEST_SOURCES:%.cc=%.o)
-
-OBJECTS=		$(LIBTCC_OBJECTS)\
-			$(SIMPLE_EXAMPLE_OBJECTS)\
-			$(CORRELATOR_TEST_OBJECTS)\
-			$(OPENCL_TEST_OBJECTS)
-
-SHARED_OBJECTS=		libtcc/libtcc.so libtcc/libtcc.so.$(VERSION)
-
-DEPENDENCIES=		$(OBJECTS:%.o=%.d)
-
-EXECUTABLES=		test/SimpleExample/SimpleExample\
-			test/CorrelatorTest/CorrelatorTest\
-			test/OpenCLCorrelatorTest/OpenCLCorrelatorTest
-
-CUDA_WRAPPERS_DIR=       external/cuda-wrappers
-CUDA_WRAPPERS_LIB=        $(CUDA_WRAPPERS_DIR)/libcu.so
-CUDA_WRAPPERS_INCLUDE=    $(CUDA_WRAPPERS_DIR)/cu
-#LIBTCC_OBJECTS+=         $(CUDA_WRAPPERS_LIB)
-
-LIBRARIES=		-L$(CUDA_LIBDIR) -lcuda\
-			$(CUDA_WRAPPERS_LIB) \
-			-L$(NVRTC_LIBDIR) -lnvrtc #\
-			#-L$(POWER_SENSOR)/lib -lpowersensor -lnvidia-ml
-
-
-%.d:			%.cc
-			-$(CXX) $(CXXFLAGS) -MM -MT $@ -MT ${@:%.d=%.o} -MT ${@:%.d=%.s} $< -o $@
-
-%.d:			%.cu
-			-$(CXX) -x c++ $(CXXFLAGS) -MM -MT $@ -MT ${@:%.d=%.o} -MT ${@:%.d=%.s} $< -o $@
-
-%.o:			%.cc
-			$(CXX) $(CXXFLAGS) -o $@ -c $<
-
-%.o:			%.cu
-			$(NVCC) $(NVCCFLAGS) -o $@ -c $<
-
-%.s:			%.cc
-			$(CXX) $(CXXFLAGS) -o $@ -S $<
-
-%.so:			%.so.$(VERSION)
-			rm -f $@
-			ln -s $(@F).$(VERSION) $@
-
-all::			$(EXECUTABLES)
-
-clean::
-			$(RM) $(OBJECTS) $(SHARED_OBJECTS) $(DEPENDENCIES) $(EXECUTABLES)
-
-$(CUDA_WRAPPERS_LIB):
-			cd $(CUDA_WRAPPERS_DIR) && cmake .
-			cd $(CUDA_WRAPPERS_DIR) && CPATH=$(CPATH):$(CUDA_INCLUDE) make
-
-libtcc/TCCorrelator.o:	libtcc/TCCorrelator.cu	# CUDA code embedded in object file
-			ld -r -b binary -o $@ $<
-
-libtcc/TCCorrelator.d:
-			-
-
-libtcc/libtcc.so.$(VERSION):			$(LIBTCC_OBJECTS) $(CUDA_WRAPPERS_LIB)
-			$(CXX) -shared -o $@ $^ $(LIBRARIES)
-
-test/SimpleExample/SimpleExample:		$(SIMPLE_EXAMPLE_OBJECTS) libtcc/libtcc.so
-			$(NVCC) $(NVCCFLAGS) -o $@ $(SIMPLE_EXAMPLE_OBJECTS) -Xlinker -rpath=$(CUDA_WRAPPERS_DIR) -Llibtcc -ltcc $(LIBRARIES)
-
-test/CorrelatorTest/CorrelatorTest:		$(CORRELATOR_TEST_OBJECTS) libtcc/libtcc.so
-			$(CXX) $(CXXFLAGS) -o $@ $(CORRELATOR_TEST_OBJECTS) -Wl,-rpath=$(CUDA_WRAPPERS_DIR) -Llibtcc -ltcc $(LIBRARIES)
-
-test/OpenCLCorrelatorTest/OpenCLCorrelatorTest:	$(OPENCL_TEST_OBJECTS)
-			$(CXX) $(CXXFLAGS) -o $@ $(OPENCL_TEST_OBJECTS) -L$(CUDA)/lib64 -lOpenCL
-
-ifeq (0, $(words $(findstring $(MAKECMDGOALS), clean)))
--include $(DEPENDENCIES)
-endif
diff --git a/README.md b/README.md
index 362d4e09392bf4e17b3666de817d8ae6124d8242..a52ae018de781cdee447e4136e12386dc6ea7233 100644
--- a/README.md
+++ b/README.md
@@ -65,4 +65,32 @@ Limitations:
 - the amount of samples over which is integrated) must be a multiple of 128 / `NR_BITS`
   (i.e., 32, 16, or 8 for 4-bit, 8-bit, or 16-bit input, respectively).
 
+## Building, testing, and installation
+
+To build and install the project, run:
+```bash
+cmake -S . -B build
+make -C build
+make -C build install
+```
+
+To install in a custom location, e.g. `~/.local`, run:
+```bash
+cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local
+make -C build
+make -C build install
+```
+
+To compile and run the tests, run:
+```bash
+cmake -S. -B build -DBUILD_TESTING=ON
+make -C build
+make -C build test
+```
+The tests require a GPU.
+On the DAS cluster you can request a GPU node and run the tests with the command:
+```bash
+srun -N 1 -C gpunode --gres=gpu:1 make -C build test
+```
+
 Contact John Romein (romein@astron.nl) to report bugs/feedback
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..d20abf7468c715cdb5e3f26a706f1337201e680c
--- /dev/null
+++ b/cmake/Utils.cmake
@@ -0,0 +1,13 @@
+# Make it possible to #include cuda source code
+function(include_cuda_code target input_file)
+  # Save file containing cuda code as a C++ raw string literal
+  file(READ ${input_file} content)
+  set(delim "for_c++_include")
+  set(content "R\"${delim}(\n${content})${delim}\"")
+  set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${input_file}")
+  file(WRITE ${output_file} "${content}")
+  # Add save path to the include directories
+  get_filename_component(output_subdir ${input_file} DIRECTORY)
+  target_include_directories(
+    ${target} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${output_subdir}")
+endfunction(include_cuda_code)
diff --git a/cmake/tensor_core_correlator-config.cmake.in b/cmake/tensor_core_correlator-config.cmake.in
new file mode 100644
index 0000000000000000000000000000000000000000..f670102752c3e4a91da1e313572642831ffeb9da
--- /dev/null
+++ b/cmake/tensor_core_correlator-config.cmake.in
@@ -0,0 +1,7 @@
+include(CMakeFindDependencyMacro)
+
+find_package(CUDAToolkit @CUDA_MIN_VERSION@ REQUIRED)
+
+foreach(component ${@PROJECT_NAME@_FIND_COMPONENTS})
+  include(${CMAKE_CURRENT_LIST_DIR}/${component}-config.cmake)
+endforeach()
diff --git a/external/cuda-wrappers b/external/cuda-wrappers
deleted file mode 160000
index 884fbc7c69617f0b4fbc6696435272a488f49716..0000000000000000000000000000000000000000
--- a/external/cuda-wrappers
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 884fbc7c69617f0b4fbc6696435272a488f49716
diff --git a/external/cudawrappers b/external/cudawrappers
new file mode 160000
index 0000000000000000000000000000000000000000..ffacb0e3c649a4d8999b3c7959ce12dc03d25bf7
--- /dev/null
+++ b/external/cudawrappers
@@ -0,0 +1 @@
+Subproject commit ffacb0e3c649a4d8999b3c7959ce12dc03d25bf7
diff --git a/libtcc/CMakeLists.txt b/libtcc/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fae038adc5098046a736a99d2e7ab4d684345394
--- /dev/null
+++ b/libtcc/CMakeLists.txt
@@ -0,0 +1,26 @@
+# Create tcc library
+add_library(tcc)
+# Add source files
+target_sources(tcc PRIVATE Correlator.cc CorrelatorKernel.cc Kernel.cc)
+# Add public header
+set_target_properties(tcc PROPERTIES PUBLIC_HEADER Correlator.h)
+# Add includes
+target_include_directories(
+  tcc PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}>
+             $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>)
+# Add links
+target_link_libraries(tcc PUBLIC cudawrappers::cu cudawrappers::nvrtc)
+# Install libraries and headers
+install(
+  TARGETS tcc
+  EXPORT tcc-config # export tcc cmake targets
+  COMPONENT tcc
+  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME})
+# Install tcc cmake targets
+install(
+  EXPORT tcc-config
+  NAMESPACE ${PROJECT_NAME}::
+  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME})
+# Embed CUDA code
+include(${CMAKE_SOURCE_DIR}/cmake/Utils.cmake)
+include_cuda_code(tcc kernel/TCCorrelator.cu)
diff --git a/libtcc/Correlator.cc b/libtcc/Correlator.cc
index 856ea1069645c0990e79bd67f2a21c51151b39d1..99b47ceb68e449e78cddfb78516ae01f8be26bd4 100644
--- a/libtcc/Correlator.cc
+++ b/libtcc/Correlator.cc
@@ -82,7 +82,9 @@ cu::Module Correlator::compileModule(unsigned nrBits,
   // embed the CUDA source code in libtcc.so, so that it need not be installed separately
   // for runtime compilation
   // copy into std::string for '\0' termination
-  std::string source(&_binary_libtcc_TCCorrelator_cu_start, &_binary_libtcc_TCCorrelator_cu_end);
+  const std::string source =
+#include "TCCorrelator.cu"
+  ;
   nvrtc::Program program(source, "TCCorrelator.cu");
 #endif
 
diff --git a/libtcc/Correlator.h b/libtcc/Correlator.h
index 0a14267b1d7549e320663485d5213c98d5a71c8f..8a5b626b9fdb9f32ac0c81642827f8f35f58c348 100644
--- a/libtcc/Correlator.h
+++ b/libtcc/Correlator.h
@@ -1,12 +1,12 @@
 #if !defined TCC_CORRELATOR_H
 #define TCC_CORRELATOR_H
 
-#include "libtcc/CorrelatorKernel.h"
-#include "external/cuda-wrappers/cu/cu.h"
-#include "external/cuda-wrappers/cu/nvrtc.h"
-
 #include <string>
 
+#include <cu.hpp>
+#include <nvrtc.hpp>
+
+#include "libtcc/CorrelatorKernel.h"
 
 namespace tcc {
   class Correlator {
diff --git a/libtcc/Kernel.h b/libtcc/Kernel.h
index a5d195c21f32e0a30b4f6cfc1a61478e28326dd0..1acf84c063050e41f5982fdc5f83b1f5af1a96f8 100644
--- a/libtcc/Kernel.h
+++ b/libtcc/Kernel.h
@@ -1,10 +1,9 @@
 #if !defined TCC_KERNEL_H
 #define TCC_KERNEL_H
 
-#include "external/cuda-wrappers/cu/cu.h"
-
 #include <stdint.h>
 
+#include <cu.hpp>
 
 namespace tcc {
   class Kernel
diff --git a/libtcc/TCCorrelator.cu b/libtcc/kernel/TCCorrelator.cu
similarity index 100%
rename from libtcc/TCCorrelator.cu
rename to libtcc/kernel/TCCorrelator.cu
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2ca29168a1b4ad4a5941186e493a3ae5dcd44a21
--- /dev/null
+++ b/test/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_subdirectory(Common)
+add_subdirectory(CorrelatorTest)
+add_subdirectory(OpenCLCorrelatorTest)
+add_subdirectory(SimpleExample)
diff --git a/test/Common/CMakeLists.txt b/test/Common/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0c142fa0716163bc11164c23c7f2ddbd9c539d08
--- /dev/null
+++ b/test/Common/CMakeLists.txt
@@ -0,0 +1,6 @@
+foreach(component Record UnitTest)
+  add_library(${component})
+  target_sources(${component} PRIVATE ${component}.cc)
+  target_include_directories(${component} PRIVATE ${CMAKE_SOURCE_DIR})
+  target_link_libraries(${component} PRIVATE cudawrappers::cu)
+endforeach()
diff --git a/test/Common/Record.h b/test/Common/Record.h
index fa65629c19ef398f97bbd9fcdd0011e14ec62143..9626fc2c2da64a4eaea29370e289b4c318cb6b08 100644
--- a/test/Common/Record.h
+++ b/test/Common/Record.h
@@ -1,9 +1,9 @@
 #if !defined RECORD_H
 #define RECORD_H
 
-#include "test/Common/Config.h"
+#include <cu.hpp>
 
-#include "external/cuda-wrappers/cu/cu.h"
+#include "test/Common/Config.h"
 
 #if defined MEASURE_POWER
 #include <powersensor/NVMLPowerSensor.h>
diff --git a/test/Common/UnitTest.h b/test/Common/UnitTest.h
index be3c090f487fae9f0cc757fb196beef8cba25b32..f452bd3696d4c64584e27be6e37e5e8b94f78b40 100644
--- a/test/Common/UnitTest.h
+++ b/test/Common/UnitTest.h
@@ -1,8 +1,9 @@
 #if !defined UNIT_TEST_H
 #define UNIT_TEST_H
 
+#include <cu.hpp>
+
 #include "test/Common/Record.h"
-#include "external/cuda-wrappers/cu/cu.h"
 
 #if defined MEASURE_POWER
 #include <powersensor/NVMLPowerSensor.h>
diff --git a/test/CorrelatorTest/CMakeLists.txt b/test/CorrelatorTest/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a4f67a91af1533fbf3c0dfb44b249bd304cca40a
--- /dev/null
+++ b/test/CorrelatorTest/CMakeLists.txt
@@ -0,0 +1,17 @@
+find_package(OpenMP REQUIRED)
+
+add_executable(CorrelatorTest)
+target_sources(CorrelatorTest PRIVATE CorrelatorTest.cc Options.cc)
+target_include_directories(CorrelatorTest PRIVATE ${CMAKE_SOURCE_DIR})
+target_link_libraries(CorrelatorTest PRIVATE tcc Record UnitTest
+                                             OpenMP::OpenMP_CXX)
+
+# Define various combinations of parameters to test:
+set(ARGS0 -b  4 -c 1 -n 1 -N 32 -r 1 -R 1 -t 32 -V 1)
+set(ARGS1 -b  8 -c 1 -n 1 -N 48 -r 1 -R 1 -t 16 -V 1)
+set(ARGS2 -b 16 -c 1 -n 1 -N 64 -r 1 -R 1 -t  8 -V 1)
+set(ARGS3 -b 16 -c 2 -n 3 -N 32 -r 4 -R 5 -t 64 -V 1)
+
+foreach(idx RANGE 3)
+  add_test(NAME CorrelatorTest${idx} COMMAND CorrelatorTest ${ARGS${idx}})
+endforeach()
diff --git a/test/CorrelatorTest/CorrelatorTest.cc b/test/CorrelatorTest/CorrelatorTest.cc
index 33b33d01381d53f22fbbaee48bce2f47b73d6211..36a38c54c2b516c1faad773d575835f2fd0edbfe 100644
--- a/test/CorrelatorTest/CorrelatorTest.cc
+++ b/test/CorrelatorTest/CorrelatorTest.cc
@@ -1,18 +1,19 @@
-#include "test/Common/ComplexInt4.h"
-#include "test/Common/Record.h"
 #include "test/CorrelatorTest/CorrelatorTest.h"
-#include "util/ExceptionPropagator.h"
-#include "external/cuda-wrappers/cu/nvrtc.h"
 
 #include <cstdlib>
 #include <cstring>
 #include <iostream>
 
+#include <nvrtc.hpp>
+
+#include "test/Common/ComplexInt4.h"
+#include "test/Common/Record.h"
+#include "util/ExceptionPropagator.h"
+
 #define GNU_SOURCE
 #include <link.h>
 #include <omp.h>
 
-
 CorrelatorTest::CorrelatorTest(const Options &options)
 :
   UnitTest(options.deviceNumber),
@@ -223,17 +224,21 @@ template<typename SampleType, typename VisibilityType> void CorrelatorTest::veri
 
 int main(int argc, char *argv[])
 {
+  int err{0};
   try {
     cu::init();
     Options options(argc, argv);
     CorrelatorTest test(options);
   } catch (cu::Error &error) {
     std::cerr << "cu::Error: " << error.what() << std::endl;
+    err = 1;
   } catch (nvrtc::Error &error) {
     std::cerr << "nvrtc::Error: " << error.what() << std::endl;
+    err = 1;
   } catch (Options::Error &error) {
     std::cerr << "Options::Error: " << error.what() << std::endl;
+    err = 1;
   }
 
-  return 0;
+  return err;
 }
diff --git a/test/OpenCLCorrelatorTest/CMakeLists.txt b/test/OpenCLCorrelatorTest/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a73805a57a63102068fc16b02ac720748d9ba6e3
--- /dev/null
+++ b/test/OpenCLCorrelatorTest/CMakeLists.txt
@@ -0,0 +1,15 @@
+find_package(OpenMP REQUIRED)
+find_package(OpenCL REQUIRED)
+
+add_executable(OpenCLCorrelatorTest)
+target_sources(OpenCLCorrelatorTest PRIVATE OpenCLCorrelatorTest.cc)
+target_include_directories(OpenCLCorrelatorTest PRIVATE ${CMAKE_SOURCE_DIR}
+                                                        ${OpenCL_INCLUDE_DIRS})
+target_link_libraries(OpenCLCorrelatorTest PRIVATE ${OpenCL_LIBRARIES}
+                                                   OpenMP::OpenMP_CXX)
+
+add_test(
+  NAME OpenCLCorrelatorTest
+  COMMAND OpenCLCorrelatorTest
+  # Specify working directory so the kernel code is found
+  WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
diff --git a/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc b/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc
index 9099af31f97d882e73947e7f8528feefa4fdeb78..e33634cb43f0fde060cca7c5e24f8b70c419fd6a 100644
--- a/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc
+++ b/test/OpenCLCorrelatorTest/OpenCLCorrelatorTest.cc
@@ -244,7 +244,7 @@ cl::Program createProgramFromBinaries(cl::Context &context, std::vector<cl::Devi
 	    << " -DNR_POLARIZATIONS=" << NR_POLARIZATIONS
 	    << " -DNR_RECEIVERS_PER_BLOCK=" << NR_RECEIVERS_PER_BLOCK
 	    << " -o -"
-	    << " libtcc/TCCorrelator.cu"
+	    << " libtcc/kernel/TCCorrelator.cu"
 	    << "|sed -e s/.param\\ .[a-zA-Z0-9]*/\\&\\ .ptr\\ .global/";
 
     std::clog << "executing: " << command.str() << std::endl;
@@ -402,6 +402,7 @@ void checkTestPattern(cl::CommandQueue &queue, cl::Buffer &visibilitiesBuffer, c
 
 int main()
 {
+  int err{0};
   try {
     cl::Context             context;
     std::vector<cl::Device> devices;
@@ -443,9 +444,11 @@ for (int i = 0; i < 100; i ++)
     checkTestPattern(queue, visibilities, samples);
    } catch (cl::Error &error) {
      std::cerr << "caught cl::Error: " << error.what() << ": " << errorMessage(error.err()) << std::endl;
+     err = 1;
    } catch (std::exception &error) {
      std::cerr << "caught std::exception: " << error.what() << std::endl;
+     err = 1;
    }
 
-  return 0;
+  return err;
 }
diff --git a/test/SimpleExample/CMakeLists.txt b/test/SimpleExample/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1933aad3bb3f77f73e1f74b2ee26aeb854382eb0
--- /dev/null
+++ b/test/SimpleExample/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_executable(SimpleExample)
+set_property(TARGET SimpleExample PROPERTY CUDA_ARCHITECTURES OFF)
+target_sources(SimpleExample PRIVATE SimpleExample.cu)
+target_include_directories(SimpleExample PRIVATE ${CMAKE_SOURCE_DIR})
+target_link_libraries(SimpleExample PRIVATE tcc)
+
+add_test(NAME SimpleExample COMMAND SimpleExample)
diff --git a/test/SimpleExample/SimpleExample.cu b/test/SimpleExample/SimpleExample.cu
index 4fa52ddd281d7483c89031dac05dbe1b7a669783..48dea2fd0c697653f681748cf196763716c3619a 100644
--- a/test/SimpleExample/SimpleExample.cu
+++ b/test/SimpleExample/SimpleExample.cu
@@ -7,14 +7,14 @@
 #define NR_RECEIVERS_PER_BLOCK 64
 #define NR_TIMES_PER_BLOCK (128 / (NR_BITS))
 
-
-#include "test/Common/ComplexInt4.h"
-#include "libtcc/Correlator.h"
-
 #include <complex>
 #include <iostream>
 
 #include <cuda.h>
+
+#include "test/Common/ComplexInt4.h"
+#include "libtcc/Correlator.h"
+
 #if NR_BITS == 16
 #include <cuda_fp16.h>
 #endif
@@ -46,6 +46,7 @@ typedef Visibility Visibilities[NR_CHANNELS][NR_BASELINES][NR_POLARIZATIONS][NR_
 
 int main()
 {
+  int err{0};
   try {
     checkCudaCall(cudaSetDevice(0)); // combine the CUDA runtime API and CUDA driver API
     checkCudaCall(cudaFree(0));
@@ -73,5 +74,7 @@ int main()
     checkCudaCall(cudaStreamDestroy(stream));
   } catch (std::exception &error) {
     std::cerr << error.what() << std::endl;
+    err = 1;
   }
+  return err;
 }