LAMA
|
00001 00033 #ifndef LAMA_CUDA_CONTEXT_HPP_ 00034 #define LAMA_CUDA_CONTEXT_HPP_ 00035 00036 // for dll_import 00037 #include <lama/config.hpp> 00038 00039 // base classes 00040 #include <lama/Context.hpp> 00041 #include <boost/enable_shared_from_this.hpp> 00042 00043 // others 00044 #include <lama/task/Thread.hpp> 00045 00046 #include <cuda.h> 00047 #include <cuda_runtime.h> 00048 #include <cusparse.h> 00049 00050 // boost 00051 #include <boost/weak_ptr.hpp> 00052 #include <boost/version.hpp> 00053 00054 #include <string> 00055 00056 namespace lama 00057 { 00058 00059 class LAMA_DLL_IMPORTEXPORT CUDAStreamSyncToken; 00060 00066 #define LAMA_CUDA_ENV_FOR_DEVICE "LAMA_DEVICE" 00067 00076 class LAMA_DLL_IMPORTEXPORT CUDAContext : public Context, public boost::enable_shared_from_this<CUDAContext> 00077 { 00078 00079 friend class CUDAContextManager; 00080 00081 public: 00082 00087 virtual ~CUDAContext(); 00088 00089 int getDeviceNr() const { return mDeviceNr; } 00090 00093 virtual bool canUseData(const Context& other) const; 00094 00095 virtual void writeAt( std::ostream& stream ) const; 00096 00097 virtual void* allocate( const size_t size ) const; 00098 00099 virtual void allocate( ContextData& contextData, const size_t size ) const; 00100 00101 virtual void free( void* pointer, const size_t size ) const; 00102 00103 virtual void free( ContextData& contextData ) const; 00104 00105 virtual void memcpy( void* dst, const void* src, const size_t size ) const; 00106 00107 virtual std::auto_ptr<SyncToken> memcpyAsync( void* dst, const void* src, const size_t size ) const; 00108 00109 virtual bool cancpy( const ContextData& dst, const ContextData& src ) const; 00110 00111 virtual void memcpy( ContextData& dst, const ContextData& src, const size_t size ) const; 00112 00113 virtual std::auto_ptr<SyncToken> memcpyAsync( ContextData& dst, const ContextData& src, const size_t size ) const; 00114 00121 virtual void enable( const char* filename, int line ) const; 00122 00123 virtual void disable( const char* filename, int line ) const; 00124 00125 std::auto_ptr<CUDAStreamSyncToken> getComputeSyncToken() const; 00126 std::auto_ptr<CUDAStreamSyncToken> getTransferSyncToken() const; 00127 00130 virtual std::auto_ptr<SyncToken> getSyncToken() const; 00131 00132 protected: 00133 00144 CUDAContext( int device ); 00145 00146 private: 00147 00148 void memcpyFromHost( void* dst, const void* src, const size_t size ) const; 00149 void memcpyToHost( void* dst, const void* src, const size_t size ) const; 00150 void memcpyFromCUDAHost( void* dst, const void* src, const size_t size ) const; 00151 void memcpyToCUDAHost( void* dst, const void* src, const size_t size ) const; 00152 00153 std::auto_ptr<SyncToken> memcpyAsyncFromHost( void* dst, const void* src, const size_t size ) const; 00154 std::auto_ptr<SyncToken> memcpyAsyncToHost( void* dst, const void* src, const size_t size ) const; 00155 std::auto_ptr<SyncToken> memcpyAsyncFromCUDAHost( void* dst, const void* src, const size_t size ) const; 00156 std::auto_ptr<SyncToken> memcpyAsyncToCUDAHost( void* dst, const void* src, const size_t size ) const; 00157 00158 int mDeviceNr; 00159 00160 CUdevice mCUdevice; 00161 00162 CUcontext mCUcontext; 00163 00164 CUstream mTransferStream; 00165 CUstream mComputeStream; 00166 00167 cusparseHandle_t mCusparseHandle; 00168 00169 std::string mDeviceName; 00170 00171 Thread::Id mOwnerThread; 00172 00173 mutable int mNumberOfAllocates; 00174 mutable long long mNumberOfAllocatedBytes; 00175 mutable long long mMaxNumberOfAllocatedBytes; 00176 00177 static int currentDeviceNr; 00178 00179 static int numUsedDevices; 00180 00181 static size_t minPinnedSize; 00182 00183 LAMA_LOG_DECL_STATIC_LOGGER( logger ); 00184 }; 00185 00186 } //namespace lama 00187 00188 #endif // LAMA_CUDA_CONTEXT_HPP_