diff --git a/libtcc/Correlator.h b/libtcc/Correlator.h
index 723361f8109930ddf789c7e63a33d62a9c5d6d1e..7b33fa2f3fdffda90843ed22efdb1ab7c8b3638c 100644
--- a/libtcc/Correlator.h
+++ b/libtcc/Correlator.h
@@ -1,5 +1,5 @@
-#if !defined CORRELATOR_H
-#define CORRELATOR_H
+#if !defined TCC_CORRELATOR_H
+#define TCC_CORRELATOR_H
 
 #include "libtcc/CorrelatorKernel.h"
 #include "util/cu.h"
@@ -17,7 +17,7 @@ namespace tcc {
 		 unsigned nrReceiversPerBlock = 64
 		); // throw (cu::Error, nvrtc::Error)
 
-      void launchAsync(cu::Stream &, cu::DeviceMemory &isibilities, cu::DeviceMemory &samples); // throw (cu::Error)
+      void launchAsync(cu::Stream &, cu::DeviceMemory &visibilities, cu::DeviceMemory &samples); // throw (cu::Error)
       void launchAsync(CUstream, CUdeviceptr visibilities, CUdeviceptr samples); // throw (cu::Error)
 
       uint64_t FLOPS() const;
diff --git a/libtcc/CorrelatorKernel.h b/libtcc/CorrelatorKernel.h
index e28a2fdb197bcf08ff566331c6b2a1259dc7d60b..a0a92f1cd65e6859984b2a027bb54cfed2d6d1bf 100644
--- a/libtcc/CorrelatorKernel.h
+++ b/libtcc/CorrelatorKernel.h
@@ -1,5 +1,5 @@
-#if !defined CORRELATOR_KERNEL_H
-#define CORRELATOR_KERNEL_H
+#if !defined TCC_CORRELATOR_KERNEL_H
+#define TCC_CORRELATOR_KERNEL_H
 
 #include "libtcc/Kernel.h"
 
diff --git a/libtcc/Kernel.h b/libtcc/Kernel.h
index f6c9ab410017c32c92fb3e2356a2546a94a76b49..63329ee32a91a50e6c261b5f5f48e97bfbe533df 100644
--- a/libtcc/Kernel.h
+++ b/libtcc/Kernel.h
@@ -1,5 +1,5 @@
-#if !defined KERNEL_H
-#define KERNEL_H
+#if !defined TCC_KERNEL_H
+#define TCC_KERNEL_H
 
 #include "util/cu.h"
 
diff --git a/test/Common/Config.h b/test/Common/Config.h
index 0ecedb4b6185e0741431a3067fa137ffa684bae6..fcacde73d55f489feb1b820504ebbffcfa960305 100644
--- a/test/Common/Config.h
+++ b/test/Common/Config.h
@@ -1,5 +1,5 @@
-#if !defined CONFIG_H
-#define CONFIG_H
+#if !defined TCC_CONFIG_H
+#define TCC_CONFIG_H
 
 #if defined __ARM_ARCH
 #define UNIFIED_MEMORY // assume this is a Jetson Xavier
diff --git a/test/CorrelatorTest/CorrelatorTest.cc b/test/CorrelatorTest/CorrelatorTest.cc
index 23bc7c156344e3f78766f4422d49e53cb09b81f9..2735a36378a9545b678b78b089623571e107d66c 100644
--- a/test/CorrelatorTest/CorrelatorTest.cc
+++ b/test/CorrelatorTest/CorrelatorTest.cc
@@ -101,7 +101,7 @@ template <typename SampleType, typename VisibilityType> void CorrelatorTest::doT
 	    hostToDeviceStream.memcpyHtoDAsync(deviceSamples, hostSamples, samplesRef.bytesize());
 	    hostToDeviceRecordStop.enqueue(hostToDeviceStream);
 
-	    stream.waitEvent(hostToDeviceRecordStop.event);
+	    stream.wait(hostToDeviceRecordStop.event);
 #endif
 
 	    computeRecordStart.enqueue(stream);
@@ -112,7 +112,7 @@ template <typename SampleType, typename VisibilityType> void CorrelatorTest::doT
 	    computeRecordStop.enqueue(stream);
 
 #if !defined UNIFIED_MEMORY
-	    deviceToHostStream.waitEvent(computeRecordStop.event);
+	    deviceToHostStream.wait(computeRecordStop.event);
 	    deviceToHostRecordStart.enqueue(deviceToHostStream);
 	    deviceToHostStream.memcpyDtoHAsync(hostVisibilities, deviceVisibilities, visibilitiesRef.bytesize());
 	    deviceToHostRecordStop.enqueue(deviceToHostStream);
diff --git a/test/CorrelatorTest/Options.h b/test/CorrelatorTest/Options.h
index 9e51307fcb5e2eefdabf08b11c04119bbab433e6..441e1fae0d0623f37cb5fd28c424a81fcb82c3cd 100644
--- a/test/CorrelatorTest/Options.h
+++ b/test/CorrelatorTest/Options.h
@@ -1,5 +1,5 @@
-#if !defined OPTIONS_H
-#define OPTIONS_H
+#if !defined TCC_OPTIONS_H
+#define TCC_OPTIONS_H
 
 #include <exception>
 #include <string>
diff --git a/util/cu.h b/util/cu.h
index 46dfdea4dce9cbb44ee4b26b6f45331cebc49175..72f6cf575a7512b097539202a623b0f089ed951a 100644
--- a/util/cu.h
+++ b/util/cu.h
@@ -5,6 +5,7 @@
 #include <cuda_runtime_api.h>
 #include <exception>
 #include <fstream>
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -61,21 +62,18 @@ namespace cu {
   template <typename T> class Wrapper
   {
     public:
-      // Wrapper<T>(Wrapper<T> &) = delete; // disallow copies
-
       // conversion to C-style T
 
-      operator const T & () const
+      operator T () const
       {
 	return _obj;
       }
 
-      operator T & ()
+      operator T ()
       {
 	return _obj;
       }
 
-#if 0 // makes no sense if object is not copyable
       bool operator == (const Wrapper<T> &other)
       {
 	return _obj == other._obj;
@@ -85,24 +83,35 @@ namespace cu {
       {
 	return _obj != other._obj;
       }
-#endif
 
     protected:
       Wrapper<T>()
+      {
+      }
+
+      Wrapper<T>(const Wrapper<T> &other)
+      :
+	_obj(other._obj),
+	manager(other.manager)
+      {
+      }
+
+      Wrapper<T>(Wrapper<T> &&other)
       :
-        hasOwnership(true)
+	_obj(other._obj),
+	manager(std::move(other.manager))
       {
+	other._obj = 0;
       }
 
       Wrapper<T>(T &obj)
       :
-        _obj(obj),
-        hasOwnership(false)
+        _obj(obj)
       {
       }
 
-      bool hasOwnership;
       T _obj;
+      std::shared_ptr<T> manager;
   };
   
   class Device : public Wrapper<CUdevice>
@@ -185,6 +194,7 @@ namespace cu {
         _primaryContext(false)
       {
 	checkCudaCall(cuCtxCreate(&_obj, flags, device));
+	manager = std::shared_ptr<CUcontext>(new CUcontext(_obj), [] (CUcontext *ptr) { if (*ptr) cuCtxDestroy(*ptr); delete ptr; });
       }
 
       Context(CUcontext context)
@@ -194,14 +204,6 @@ namespace cu {
       {
       }
 
-      ~Context()
-      {
-	if (hasOwnership)
-	  checkCudaCall(cuCtxDestroy(_obj));
-	//else
-	  //checkCudaCall(cuDevicePrimaryCtxRelease(getDevice())); // FIXME
-      }
-
       unsigned getApiVersion() const
       {
 	unsigned version;
@@ -225,7 +227,7 @@ namespace cu {
       {
 	CUcontext context;
 	checkCudaCall(cuCtxGetCurrent(&context));
-	return Context(context);
+	return std::move(Context(context));
       }
 
       void setCurrent() const
@@ -303,11 +305,7 @@ namespace cu {
       HostMemory(size_t size, int flags = 0)
       {
 	checkCudaCall(cuMemHostAlloc(&_obj, size, flags));
-      }
-
-      ~HostMemory()
-      {
-	checkCudaCall(cuMemFreeHost(_obj));
+	manager = std::shared_ptr<void *>(new (void *)(_obj), [] (void **ptr) { cuMemFreeHost(*ptr); delete ptr; });
       }
 
       template <typename T> operator T * ()
@@ -323,6 +321,7 @@ namespace cu {
       DeviceMemory(size_t size)
       {
 	checkCudaCall(cuMemAlloc(&_obj, size));
+	manager = std::shared_ptr<CUdeviceptr>(new CUdeviceptr(_obj), [] (CUdeviceptr *ptr) { cuMemFree(*ptr); delete ptr; });
       }
 
       DeviceMemory(CUdeviceptr ptr)
@@ -331,32 +330,15 @@ namespace cu {
       {
       }
 
-      DeviceMemory(HostMemory &hostMemory)
+      DeviceMemory(const HostMemory &hostMemory)
       {
-	hasOwnership = false;
 	checkCudaCall(cuMemHostGetDevicePointer(&_obj, hostMemory, 0));
       }
 
-      ~DeviceMemory()
-      {
-	if (hasOwnership)
-	  checkCudaCall(cuMemFree(_obj));
-      }
-
       const void *parameter() const // used to construct parameter list for launchKernel();
       {
 	return &_obj;
       }
-
-      template <typename T> operator T * () const
-      {
-	return (T *) _obj;
-      }
-
-      template <typename T> operator const T * () const
-      {
-	return (const T *) _obj;
-      }
   };
 
 
@@ -366,6 +348,7 @@ namespace cu {
       Array(unsigned width, CUarray_format format, unsigned numChannels)
       {
 	Array(width, 0, format, numChannels);
+	manager = std::shared_ptr<CUarray>(new CUarray(_obj), [] (CUarray *ptr) { cuArrayDestroy(*ptr); delete ptr; });
       }
 
       Array(unsigned width, unsigned height, CUarray_format format, unsigned numChannels)
@@ -376,6 +359,7 @@ namespace cu {
 	descriptor.Format      = format;
 	descriptor.NumChannels = numChannels;
 	checkCudaCall(cuArrayCreate(&_obj, &descriptor));
+	manager = std::shared_ptr<CUarray>(new CUarray(_obj), [] (CUarray *ptr) { cuArrayDestroy(*ptr); delete ptr; });
       }
 
       Array(unsigned width, unsigned height, unsigned depth, CUarray_format format, unsigned numChannels)
@@ -388,6 +372,7 @@ namespace cu {
 	descriptor.NumChannels = numChannels;
 	descriptor.Flags       = 0;
 	checkCudaCall(cuArray3DCreate(&_obj, &descriptor));
+	manager = std::shared_ptr<CUarray>(new CUarray(_obj), [] (CUarray *ptr) { cuArrayDestroy(*ptr); delete ptr; });
       }
 
       Array(CUarray &array)
@@ -395,79 +380,6 @@ namespace cu {
         Wrapper(array)
       {
       }
-
-      ~Array()
-      {
-	if (hasOwnership)
-	  checkCudaCall(cuArrayDestroy(_obj));
-      }
-  };
-
-
-#if 0
-  class TexObject : public Wrapper<CUtexObject> {
-    public:
-      TexObject(onst CUDA_RESOURCE_DESC *pResDesc, const CUDA_TEXTURE_DESC *pTexDesc, const CUDA_RESOURCE_VIEW_DESC *pResViewDesc)
-      {
-	checkCudaCall(cuTexObjectCreate(&_obj, pResDesc, pTexDesc, pResViewDesc));
-      }
-
-      TexObject(CUtexObject &obj)
-      :
-        Wrapper<CUtexObject>(obj)
-      {
-      }
-
-      ~TexObject()
-      {
-	if (hasOwnership)
-	  checkCudaCall(cuTexObjectDestroy(_obj));
-      }
-
-
-  };
-#endif
-
-
-  class TexRef : public Wrapper<CUtexref> {
-    public:
-      TexRef(CUtexref texref)
-      : 
-        Wrapper<CUtexref>(texref)
-      {
-      }
-
-#if 0 // deprecated
-      void setAddress(size_t &byte_offset, DeviceMemory &memory, size_t size)
-      {
-	checkCudaCall(cuTexRefSetAddress(&byte_offset, _obj, memory, size));
-      }
-
-      void setArray(Array &array, unsigned flags = CU_TRSA_OVERRIDE_FORMAT)
-      {
-	checkCudaCall(cuTexRefSetArray(_obj, array, flags));
-      }
-
-      void setAddressMode(int dim, CUaddress_mode am)
-      {
-	checkCudaCall(cuTexRefSetAddressMode(_obj, dim, am));
-      }
-
-      void setFilterMode(CUfilter_mode fm)
-      {
-	checkCudaCall(cuTexRefSetFilterMode(_obj, fm));
-      }
-
-      void setFlags(int flags)
-      {
-	checkCudaCall(cuTexRefSetFlags(_obj, flags));
-      }
-
-      void setFormat(CUarray_format fmt, int numPackedComponents)
-      {
-	checkCudaCall(cuTexRefSetFormat(_obj, fmt, numPackedComponents));
-      }
-#endif
   };
 
 
@@ -499,11 +411,13 @@ namespace cu {
 #else
 	checkCudaCall(cuModuleLoad(&_obj, file_name));
 #endif
+	manager = std::shared_ptr<CUmodule>(new CUmodule(_obj), [] (CUmodule *ptr) { cuModuleUnload(*ptr); delete ptr; });
       }
 
       Module(const void *data)
       {
 	checkCudaCall(cuModuleLoadData(&_obj, data));
+	manager = std::shared_ptr<CUmodule>(new CUmodule(_obj), [] (CUmodule *ptr) { cuModuleUnload(*ptr); delete ptr; });
       }
 
       Module(CUmodule &module)
@@ -512,18 +426,14 @@ namespace cu {
       {
       }
 
-      ~Module()
-      {
-	if (hasOwnership)
-	  checkCudaCall(cuModuleUnload(_obj));
-      }
-
+#if 0
       TexRef getTexRef(const char *name) const
       {
 	CUtexref texref;
 	checkCudaCall(cuModuleGetTexRef(&texref, _obj, name));
 	return TexRef(texref);
       }
+#endif
 
       CUdeviceptr getGlobal(const char *name) const
       {
@@ -537,7 +447,7 @@ namespace cu {
   class Function : public Wrapper<CUfunction>
   {
     public:
-      Function(Module &module, const char *name)
+      Function(const Module &module, const char *name)
       {
 	checkCudaCall(cuModuleGetFunction(&_obj, module, name));
       }
@@ -559,13 +469,6 @@ namespace cu {
       {
 	checkCudaCall(cuFuncSetCacheConfig(_obj, config));
       }
-
-#if 0
-      void paramSetTexRef(TexRef &texref)
-      {
-	checkCudaCall(cuParamSetTexRef(_obj, CU_PARAM_TR_DEFAULT, texref));
-      }
-#endif
   };
 
 
@@ -575,6 +478,7 @@ namespace cu {
       Event(int flags = CU_EVENT_DEFAULT)
       {
 	checkCudaCall(cuEventCreate(&_obj, flags));
+	manager = std::shared_ptr<CUevent>(new CUevent(_obj), [] (CUevent *ptr) { cuEventDestroy(*ptr); delete ptr; });
       }
 
       Event(CUevent &event)
@@ -583,16 +487,10 @@ namespace cu {
       {
       }
 
-      ~Event()
-      {
-	if (hasOwnership)
-	  checkCudaCall(cuEventDestroy(_obj));
-      }
-
-      float elapsedTime(Event &second) const
+      float elapsedTime(const Event &start) const
       {
 	float ms;
-	checkCudaCall(cuEventElapsedTime(&ms, second, _obj));
+	checkCudaCall(cuEventElapsedTime(&ms, start, _obj));
 	return ms;
       }
 
@@ -623,6 +521,7 @@ namespace cu {
       Stream(int flags = CU_STREAM_DEFAULT)
       {
 	checkCudaCall(cuStreamCreate(&_obj, flags));
+	manager = std::shared_ptr<CUstream>(new CUstream(_obj), [] (CUstream *ptr) { cuStreamDestroy(*ptr); delete ptr; });
       }
 
       Stream(CUstream stream)
@@ -631,12 +530,6 @@ namespace cu {
       {
       }
 
-      ~Stream()
-      {
-	if (hasOwnership)
-	  checkCudaCall(cuStreamDestroy(_obj));
-      }
-
       void memcpyHtoDAsync(CUdeviceptr devPtr, const void *hostPtr, size_t size)
       {
 	checkCudaCall(cuMemcpyHtoDAsync(devPtr, hostPtr, size, _obj));
@@ -669,7 +562,7 @@ namespace cu {
 	checkCudaCall(cuStreamSynchronize(_obj));
       }
 
-      void waitEvent(Event &event)
+      void wait(Event &event)
       {
 	checkCudaCall(cuStreamWaitEvent(_obj, event, 0));
       }
@@ -700,7 +593,7 @@ namespace cu {
       }
   };
 
-#if 1
+#if 0
   class Graph : public Wrapper<CUgraph>
   {
     public:
@@ -746,6 +639,7 @@ namespace cu {
       Graph(unsigned flags = 0)
       {
 	checkCudaCall(cuGraphCreate(&_obj, flags));
+	manager = std::shared_ptr<CUgraphNode>(new CUgraphNode(_obj), [] (CUgraphNode *ptr) { cuGraphDestroy(*ptr); delete ptr; });
       }
 
       Graph(CUgraph &graph)
@@ -754,12 +648,6 @@ namespace cu {
       {
       }
 
-      ~Graph()
-      {
-	if (hasOwnership)
-	  checkCudaCall(cuGraphDestroy(_obj));
-      }
-
       ExecKernelNode addKernelNode(/* std::vector<GraphNode> dependencies, */ const KernelNodeParams &kernelArgs)
       {
 	ExecKernelNode node;
diff --git a/util/multi_array.h b/util/multi_array.h
index 1db0ea5a62be9a2acc5d66d8b5128a14641db4e8..651061d2f563c40f32c9f9a50d781e4d9ee86077 100644
--- a/util/multi_array.h
+++ b/util/multi_array.h
@@ -1,5 +1,5 @@
-#if !defined MULTI_ARRAY_H
-#define MULTI_ARRAY_H
+#if !defined TCC_MULTI_ARRAY_H
+#define TCC_MULTI_ARRAY_H
 
 // Experimental class that provides multi-dimensional arrays, similar to
 // boost::multi_array[_ref], but much simpler.