1#ifndef COLVARPROXY_GPU_H
2#define COLVARPROXY_GPU_H
4#include "colvar_gpu_support.h"
23#if defined (COLVARS_CUDA) || defined (COLVARS_HIP) || defined (COLVARS_SYCL)
47 return deallocate_host_T((
void **)pp);
59 return allocate_device_T((
void **)pp, len,
sizeof(T));
74 int error_code = COLVARS_OK;
76 error_code |= allocate_device_T((
void **)pp, len,
sizeof(T));
92 int error_code = COLVARS_OK;
106 template <
typename T>
108 return allocate_device_T_async((
void **)pp, len,
sizeof(T), stream);
117 template <
typename T>
119 return deallocate_device_T((
void **)pp);
129 template <
typename T>
131 return deallocate_device_T_async((
void **)pp, stream);
141 template <
typename T>
143 return clear_device_array_T(data, ndata,
sizeof(T));
154 template <
typename T>
156 return clear_device_array_T_async(data, ndata,
sizeof(T), stream);
167 template <
typename T>
168 int copy_HtoD(
const T *h_array, T *d_array,
size_t array_len) {
169 return copy_HtoD_T(h_array, d_array, array_len,
sizeof(T));
181 template <
typename T>
182 int copy_HtoD_async(
const T *h_array, T *d_array,
size_t array_len, cudaStream_t stream) {
183 return copy_HtoD_T_async(h_array, d_array, array_len,
sizeof(T), stream);
194 template <
typename T>
195 int copy_DtoH(
const T *d_array, T *h_array,
size_t array_len) {
196 return copy_DtoH_T(d_array, h_array, array_len,
sizeof(T));
208 template <
typename T>
209 int copy_DtoH_async(
const T *d_array, T *h_array,
size_t array_len, cudaStream_t stream) {
210 return copy_DtoH_T_async(d_array, h_array, array_len,
sizeof(T), stream);
221 template <
typename T>
222 int copy_DtoD(
const T *d_src, T *d_dst,
size_t array_len) {
223 return copy_DtoD_T(d_src, d_dst, array_len,
sizeof(T));
235 template <
typename T>
236 int copy_DtoD_async(
const T *d_src, T *d_dst,
size_t array_len, cudaStream_t stream) {
237 return copy_DtoD_T_async(d_src, d_dst, array_len,
sizeof(T), stream);
241 virtual int allocate_host_T(
void **pp,
const size_t len,
const size_t sizeofT);
242 virtual int deallocate_host_T(
void **pp);
243 virtual int allocate_device_T(
void **pp,
const size_t len,
const size_t sizeofT);
244 virtual int deallocate_device_T(
void **pp);
245 virtual int clear_device_array_T(
void *data,
const size_t ndata,
const size_t sizeofT);
246 virtual int allocate_device_T_async(
void **pp,
const size_t len,
const size_t sizeofT, cudaStream_t stream);
247 virtual int deallocate_device_T_async(
void **pp, cudaStream_t stream);
248 virtual int clear_device_array_T_async(
void *data,
const size_t ndata,
const size_t sizeofT, cudaStream_t stream);
249 virtual int copy_HtoD_T(
const void *h_array,
void *d_array,
size_t array_len,
const size_t sizeofT);
250 virtual int copy_HtoD_T_async(
const void *h_array,
void *d_array,
size_t array_len,
const size_t sizeofT, cudaStream_t stream);
251 virtual int copy_DtoH_T(
const void *d_array,
void *h_array,
size_t array_len,
const size_t sizeofT);
252 virtual int copy_DtoH_T_async(
const void *d_array,
void *h_array,
size_t array_len,
const size_t sizeofT, cudaStream_t stream);
253 virtual int copy_DtoD_T(
const void *d_src,
void *d_dst,
size_t array_len,
const size_t sizeofT);
254 virtual int copy_DtoD_T_async(
const void *d_src,
void *d_dst,
size_t array_len,
const size_t sizeofT, cudaStream_t stream);
260 virtual float* proxy_atoms_charges_gpu_float() {
return nullptr;}
261 virtual cvm::real* proxy_atoms_masses_gpu() {
return nullptr;}
262 virtual cvm::real* proxy_atoms_charges_gpu() {
return nullptr;}
263 virtual cvm::real* proxy_atoms_positions_gpu() {
return nullptr;}
264 virtual cvm::real* proxy_atoms_total_forces_gpu() {
return nullptr;}
265 virtual cvm::real* proxy_atoms_new_colvar_forces_gpu() {
return nullptr;}
277 virtual ~colvarproxy_gpu();
double real
Defining an abstract real number allows to switch precision.
Definition: colvarmodule.h:98
Class for managing GPU memory allocation and data transfer.
Definition: colvarproxy_gpu.h:15
int reallocate_device(T **pp, const size_t len)
Template function to reallocate device memory.
Definition: colvarproxy_gpu.h:73
int copy_HtoD(const T *h_array, T *d_array, size_t array_len)
Template function to copy data from host to device.
Definition: colvarproxy_gpu.h:168
int deallocate_device_async(T **pp, cudaStream_t stream)
Template function to deallocate device memory asynchronously.
Definition: colvarproxy_gpu.h:130
int copy_DtoH(const T *d_array, T *h_array, size_t array_len)
Template function to copy data from device to host.
Definition: colvarproxy_gpu.h:195
int reallocate_host(T **pp, const size_t len)
Template function to reallocate host-pinned memory.
Definition: colvarproxy_gpu.h:91
virtual int allocate_host_T(void **pp, const size_t len, const size_t sizeofT)
Memory management and data transfer implementations.
Definition: colvarproxy_gpu.cpp:7
int copy_DtoD(const T *d_src, T *d_dst, size_t array_len)
Template function to copy data from device to device.
Definition: colvarproxy_gpu.h:222
int deallocate_device(T **pp)
Template function to deallocate device memory.
Definition: colvarproxy_gpu.h:118
virtual float * proxy_atoms_masses_gpu_float()
Functions to get device pointers for atom properties This functions should be overridden in derived p...
Definition: colvarproxy_gpu.h:259
int copy_DtoH_async(const T *d_array, T *h_array, size_t array_len, cudaStream_t stream)
Template function to copy data from device to host asynchronously.
Definition: colvarproxy_gpu.h:209
virtual cudaStream_t get_default_stream()
Get the default CUDA stream from the proxy.
Definition: colvarproxy_gpu.h:25
int clear_device_array_async(T *data, const size_t ndata, cudaStream_t stream)
Template function to clear a device array to zero asynchronously.
Definition: colvarproxy_gpu.h:155
int copy_DtoD_async(const T *d_src, T *d_dst, size_t array_len, cudaStream_t stream)
Template function to copy data from device to device asynchronously.
Definition: colvarproxy_gpu.h:236
int allocate_device(T **pp, const size_t len)
Template function to allocate device memory.
Definition: colvarproxy_gpu.h:58
int allocate_host(T **pp, const size_t len)
Template function to allocate host-pinned memory.
Definition: colvarproxy_gpu.h:35
bool has_gpu_support() const
Whether the proxy supports GPU.
Definition: colvarproxy_gpu.h:20
int deallocate_host(T **pp)
Template function to deallocate host-pinned memory.
Definition: colvarproxy_gpu.h:46
int clear_device_array(T *data, const size_t ndata)
Template function to clear a device array to zero.
Definition: colvarproxy_gpu.h:142
bool support_gpu
Whether the proxy supports GPU.
Definition: colvarproxy_gpu.h:280
virtual int wait_for_extra_info_ready()
This function will be called after atom groups are calculated on GPU.
Definition: colvarproxy_gpu.cpp:100
colvarproxy_gpu()
Constructor.
Definition: colvarproxy_gpu.h:18
int allocate_device_async(T **pp, const size_t len, cudaStream_t stream)
Template function to allocate device memory asynchronously.
Definition: colvarproxy_gpu.h:107
int copy_HtoD_async(const T *h_array, T *d_array, size_t array_len, cudaStream_t stream)
Template function to copy data from host to device asynchronously.
Definition: colvarproxy_gpu.h:182
Collective variables main module.