Collective Variables Module - Developer Documentation
Loading...
Searching...
No Matches
colvarproxy_gpu.h
Go to the documentation of this file.
1#ifndef COLVARPROXY_GPU_H
2#define COLVARPROXY_GPU_H
3
4#include "colvar_gpu_support.h"
5#include "colvarmodule.h"
6
16public:
20 bool has_gpu_support() const {
21 return support_gpu;
22 }
23#if defined (COLVARS_CUDA) || defined (COLVARS_HIP) || defined (COLVARS_SYCL)
25 virtual cudaStream_t get_default_stream() {return (cudaStream_t)0;}
34 template <typename T>
35 int allocate_host(T **pp, const size_t len) {
36 return allocate_host_T((void **)pp, len, sizeof(T));
37 }
45 template <typename T>
46 int deallocate_host(T **pp) {
47 return deallocate_host_T((void **)pp);
48 }
57 template <typename T>
58 int allocate_device(T **pp, const size_t len) {
59 return allocate_device_T((void **)pp, len, sizeof(T));
60 }
72 template <typename T>
73 int reallocate_device(T **pp, const size_t len) {
74 int error_code = COLVARS_OK;
75 error_code |= deallocate_device(pp);
76 error_code |= allocate_device_T((void **)pp, len, sizeof(T));
77 return error_code;
78 }
90 template <typename T>
91 int reallocate_host(T **pp, const size_t len) {
92 int error_code = COLVARS_OK;
93 error_code |= deallocate_host(pp);
94 error_code |= allocate_host_T((void **)pp, len, sizeof(T));
95 return error_code;
96 }
106 template <typename T>
107 int allocate_device_async(T **pp, const size_t len, cudaStream_t stream) {
108 return allocate_device_T_async((void **)pp, len, sizeof(T), stream);
109 }
117 template <typename T>
118 int deallocate_device(T **pp) {
119 return deallocate_device_T((void **)pp);
120 }
129 template <typename T>
130 int deallocate_device_async(T **pp, cudaStream_t stream) {
131 return deallocate_device_T_async((void **)pp, stream);
132 }
141 template <typename T>
142 int clear_device_array(T *data, const size_t ndata) {
143 return clear_device_array_T(data, ndata, sizeof(T));
144 }
154 template <typename T>
155 int clear_device_array_async(T *data, const size_t ndata, cudaStream_t stream) {
156 return clear_device_array_T_async(data, ndata, sizeof(T), stream);
157 }
167 template <typename T>
168 int copy_HtoD(const T *h_array, T *d_array, size_t array_len) {
169 return copy_HtoD_T(h_array, d_array, array_len, sizeof(T));
170 }
181 template <typename T>
182 int copy_HtoD_async(const T *h_array, T *d_array, size_t array_len, cudaStream_t stream) {
183 return copy_HtoD_T_async(h_array, d_array, array_len, sizeof(T), stream);
184 }
194 template <typename T>
195 int copy_DtoH(const T *d_array, T *h_array, size_t array_len) {
196 return copy_DtoH_T(d_array, h_array, array_len, sizeof(T));
197 }
208 template <typename T>
209 int copy_DtoH_async(const T *d_array, T *h_array, size_t array_len, cudaStream_t stream) {
210 return copy_DtoH_T_async(d_array, h_array, array_len, sizeof(T), stream);
211 }
221 template <typename T>
222 int copy_DtoD(const T *d_src, T *d_dst, size_t array_len) {
223 return copy_DtoD_T(d_src, d_dst, array_len, sizeof(T));
224 }
235 template <typename T>
236 int copy_DtoD_async(const T *d_src, T *d_dst, size_t array_len, cudaStream_t stream) {
237 return copy_DtoD_T_async(d_src, d_dst, array_len, sizeof(T), stream);
238 }
241 virtual int allocate_host_T(void **pp, const size_t len, const size_t sizeofT);
242 virtual int deallocate_host_T(void **pp);
243 virtual int allocate_device_T(void **pp, const size_t len, const size_t sizeofT);
244 virtual int deallocate_device_T(void **pp);
245 virtual int clear_device_array_T(void *data, const size_t ndata, const size_t sizeofT);
246 virtual int allocate_device_T_async(void **pp, const size_t len, const size_t sizeofT, cudaStream_t stream);
247 virtual int deallocate_device_T_async(void **pp, cudaStream_t stream);
248 virtual int clear_device_array_T_async(void *data, const size_t ndata, const size_t sizeofT, cudaStream_t stream);
249 virtual int copy_HtoD_T(const void *h_array, void *d_array, size_t array_len, const size_t sizeofT);
250 virtual int copy_HtoD_T_async(const void *h_array, void *d_array, size_t array_len, const size_t sizeofT, cudaStream_t stream);
251 virtual int copy_DtoH_T(const void *d_array, void *h_array, size_t array_len, const size_t sizeofT);
252 virtual int copy_DtoH_T_async(const void *d_array, void *h_array, size_t array_len, const size_t sizeofT, cudaStream_t stream);
253 virtual int copy_DtoD_T(const void *d_src, void *d_dst, size_t array_len, const size_t sizeofT);
254 virtual int copy_DtoD_T_async(const void *d_src, void *d_dst, size_t array_len, const size_t sizeofT, cudaStream_t stream);
259 virtual float* proxy_atoms_masses_gpu_float() {return nullptr;}
260 virtual float* proxy_atoms_charges_gpu_float() {return nullptr;}
261 virtual cvm::real* proxy_atoms_masses_gpu() {return nullptr;}
262 virtual cvm::real* proxy_atoms_charges_gpu() {return nullptr;}
263 virtual cvm::real* proxy_atoms_positions_gpu() {return nullptr;}
264 virtual cvm::real* proxy_atoms_total_forces_gpu() {return nullptr;}
265 virtual cvm::real* proxy_atoms_new_colvar_forces_gpu() {return nullptr;}
267
274 virtual int wait_for_extra_info_ready();
275#endif // defined (COLVARS_CUDA) || defined (COLVARS_HIP)
277 virtual ~colvarproxy_gpu();
278protected:
281};
282
283#endif // COLVARPROXY_GPU_H
double real
Defining an abstract real number allows to switch precision.
Definition: colvarmodule.h:98
Class for managing GPU memory allocation and data transfer.
Definition: colvarproxy_gpu.h:15
int reallocate_device(T **pp, const size_t len)
Template function to reallocate device memory.
Definition: colvarproxy_gpu.h:73
int copy_HtoD(const T *h_array, T *d_array, size_t array_len)
Template function to copy data from host to device.
Definition: colvarproxy_gpu.h:168
int deallocate_device_async(T **pp, cudaStream_t stream)
Template function to deallocate device memory asynchronously.
Definition: colvarproxy_gpu.h:130
int copy_DtoH(const T *d_array, T *h_array, size_t array_len)
Template function to copy data from device to host.
Definition: colvarproxy_gpu.h:195
int reallocate_host(T **pp, const size_t len)
Template function to reallocate host-pinned memory.
Definition: colvarproxy_gpu.h:91
virtual int allocate_host_T(void **pp, const size_t len, const size_t sizeofT)
Memory management and data transfer implementations.
Definition: colvarproxy_gpu.cpp:7
int copy_DtoD(const T *d_src, T *d_dst, size_t array_len)
Template function to copy data from device to device.
Definition: colvarproxy_gpu.h:222
int deallocate_device(T **pp)
Template function to deallocate device memory.
Definition: colvarproxy_gpu.h:118
virtual float * proxy_atoms_masses_gpu_float()
Functions to get device pointers for atom properties This functions should be overridden in derived p...
Definition: colvarproxy_gpu.h:259
int copy_DtoH_async(const T *d_array, T *h_array, size_t array_len, cudaStream_t stream)
Template function to copy data from device to host asynchronously.
Definition: colvarproxy_gpu.h:209
virtual cudaStream_t get_default_stream()
Get the default CUDA stream from the proxy.
Definition: colvarproxy_gpu.h:25
int clear_device_array_async(T *data, const size_t ndata, cudaStream_t stream)
Template function to clear a device array to zero asynchronously.
Definition: colvarproxy_gpu.h:155
int copy_DtoD_async(const T *d_src, T *d_dst, size_t array_len, cudaStream_t stream)
Template function to copy data from device to device asynchronously.
Definition: colvarproxy_gpu.h:236
int allocate_device(T **pp, const size_t len)
Template function to allocate device memory.
Definition: colvarproxy_gpu.h:58
int allocate_host(T **pp, const size_t len)
Template function to allocate host-pinned memory.
Definition: colvarproxy_gpu.h:35
bool has_gpu_support() const
Whether the proxy supports GPU.
Definition: colvarproxy_gpu.h:20
int deallocate_host(T **pp)
Template function to deallocate host-pinned memory.
Definition: colvarproxy_gpu.h:46
int clear_device_array(T *data, const size_t ndata)
Template function to clear a device array to zero.
Definition: colvarproxy_gpu.h:142
bool support_gpu
Whether the proxy supports GPU.
Definition: colvarproxy_gpu.h:280
virtual int wait_for_extra_info_ready()
This function will be called after atom groups are calculated on GPU.
Definition: colvarproxy_gpu.cpp:100
colvarproxy_gpu()
Constructor.
Definition: colvarproxy_gpu.h:18
int allocate_device_async(T **pp, const size_t len, cudaStream_t stream)
Template function to allocate device memory asynchronously.
Definition: colvarproxy_gpu.h:107
int copy_HtoD_async(const T *h_array, T *d_array, size_t array_len, cudaStream_t stream)
Template function to copy data from host to device asynchronously.
Definition: colvarproxy_gpu.h:182
Collective variables main module.