|
Collective Variables Module - Developer Documentation
|
Class for managing GPU memory allocation and data transfer. More...
#include <colvarproxy_gpu.h>

Public Member Functions | |
| colvarproxy_gpu () | |
| Constructor. | |
| bool | has_gpu_support () const |
| Whether the proxy supports GPU. | |
| virtual cudaStream_t | get_default_stream () |
| Get the default CUDA stream from the proxy. | |
| template<typename T > | |
| int | allocate_host (T **pp, const size_t len) |
| Template function to allocate host-pinned memory. More... | |
| template<typename T > | |
| int | deallocate_host (T **pp) |
| Template function to deallocate host-pinned memory. More... | |
| template<typename T > | |
| int | allocate_device (T **pp, const size_t len) |
| Template function to allocate device memory. More... | |
| template<typename T > | |
| int | reallocate_device (T **pp, const size_t len) |
| Template function to reallocate device memory. More... | |
| template<typename T > | |
| int | reallocate_host (T **pp, const size_t len) |
| Template function to reallocate host-pinned memory. More... | |
| template<typename T > | |
| int | allocate_device_async (T **pp, const size_t len, cudaStream_t stream) |
| Template function to allocate device memory asynchronously. More... | |
| template<typename T > | |
| int | deallocate_device (T **pp) |
| Template function to deallocate device memory. More... | |
| template<typename T > | |
| int | deallocate_device_async (T **pp, cudaStream_t stream) |
| Template function to deallocate device memory asynchronously. More... | |
| template<typename T > | |
| int | clear_device_array (T *data, const size_t ndata) |
| Template function to clear a device array to zero. More... | |
| template<typename T > | |
| int | clear_device_array_async (T *data, const size_t ndata, cudaStream_t stream) |
| Template function to clear a device array to zero asynchronously. More... | |
| template<typename T > | |
| int | copy_HtoD (const T *h_array, T *d_array, size_t array_len) |
| Template function to copy data from host to device. More... | |
| template<typename T > | |
| int | copy_HtoD_async (const T *h_array, T *d_array, size_t array_len, cudaStream_t stream) |
| Template function to copy data from host to device asynchronously. More... | |
| template<typename T > | |
| int | copy_DtoH (const T *d_array, T *h_array, size_t array_len) |
| Template function to copy data from device to host. More... | |
| template<typename T > | |
| int | copy_DtoH_async (const T *d_array, T *h_array, size_t array_len, cudaStream_t stream) |
| Template function to copy data from device to host asynchronously. More... | |
| template<typename T > | |
| int | copy_DtoD (const T *d_src, T *d_dst, size_t array_len) |
| Template function to copy data from device to device. More... | |
| template<typename T > | |
| int | copy_DtoD_async (const T *d_src, T *d_dst, size_t array_len, cudaStream_t stream) |
| Template function to copy data from device to device asynchronously. More... | |
| virtual int | wait_for_extra_info_ready () |
| This function will be called after atom groups are calculated on GPU. More... | |
| virtual | ~colvarproxy_gpu () |
| Destructor. | |
| virtual int | allocate_host_T (void **pp, const size_t len, const size_t sizeofT) |
| Memory management and data transfer implementations. | |
| virtual int | deallocate_host_T (void **pp) |
| virtual int | allocate_device_T (void **pp, const size_t len, const size_t sizeofT) |
| virtual int | deallocate_device_T (void **pp) |
| virtual int | clear_device_array_T (void *data, const size_t ndata, const size_t sizeofT) |
| virtual int | allocate_device_T_async (void **pp, const size_t len, const size_t sizeofT, cudaStream_t stream) |
| virtual int | deallocate_device_T_async (void **pp, cudaStream_t stream) |
| virtual int | clear_device_array_T_async (void *data, const size_t ndata, const size_t sizeofT, cudaStream_t stream) |
| virtual int | copy_HtoD_T (const void *h_array, void *d_array, size_t array_len, const size_t sizeofT) |
| virtual int | copy_HtoD_T_async (const void *h_array, void *d_array, size_t array_len, const size_t sizeofT, cudaStream_t stream) |
| virtual int | copy_DtoH_T (const void *d_array, void *h_array, size_t array_len, const size_t sizeofT) |
| virtual int | copy_DtoH_T_async (const void *d_array, void *h_array, size_t array_len, const size_t sizeofT, cudaStream_t stream) |
| virtual int | copy_DtoD_T (const void *d_src, void *d_dst, size_t array_len, const size_t sizeofT) |
| virtual int | copy_DtoD_T_async (const void *d_src, void *d_dst, size_t array_len, const size_t sizeofT, cudaStream_t stream) |
| virtual float * | proxy_atoms_masses_gpu_float () |
| Functions to get device pointers for atom properties This functions should be overridden in derived proxy classes that manage actual GPU memory. | |
| virtual float * | proxy_atoms_charges_gpu_float () |
| virtual cvm::real * | proxy_atoms_masses_gpu () |
| virtual cvm::real * | proxy_atoms_charges_gpu () |
| virtual cvm::real * | proxy_atoms_positions_gpu () |
| virtual cvm::real * | proxy_atoms_total_forces_gpu () |
| virtual cvm::real * | proxy_atoms_new_colvar_forces_gpu () |
Protected Attributes | |
| bool | support_gpu |
| Whether the proxy supports GPU. | |
Class for managing GPU memory allocation and data transfer.
|
inline |
Template function to allocate device memory.
| T | The type of elements to allocate |
| [out] | pp | Pointer to the pointer that will hold the allocated device memory |
| [in] | len | Number of elements to allocate |
|
inline |
Template function to allocate device memory asynchronously.
| T | The type of elements to allocate |
| [out] | pp | Pointer to the pointer that will hold the allocated device memory |
| [in] | len | Number of elements to allocate |
| [in] | stream | The CUDA stream to use for the allocation |
|
inline |
Template function to allocate host-pinned memory.
| T | The type of elements to allocate |
| [out] | pp | Pointer to the pointer that will hold the allocated host-pinned memory |
| [in] | len | Number of elements to allocate |
|
inline |
Template function to clear a device array to zero.
| T | The type of elements in the array |
| [in] | data | Pointer to the device array to clear |
| [in] | ndata | Number of elements in the array |
|
inline |
Template function to clear a device array to zero asynchronously.
| T | The type of elements in the array |
| [in] | data | Pointer to the device array to clear |
| [in] | ndata | Number of elements in the array |
| [in] | stream | The CUDA stream to use for the operation |
|
inline |
Template function to copy data from device to device.
| T | The type of elements to copy |
| [in] | d_src | Pointer to the source device array |
| [out] | d_dst | Pointer to the destination device array |
| [in] | array_len | Number of elements to copy |
|
inline |
Template function to copy data from device to device asynchronously.
| T | The type of elements to copy |
| [in] | d_src | Pointer to the source device array |
| [out] | d_dst | Pointer to the destination device array |
| [in] | array_len | Number of elements to copy |
| [in] | stream | The CUDA stream to use for the operation |
|
inline |
Template function to copy data from device to host.
| T | The type of elements to copy |
| [in] | d_array | Pointer to the device array |
| [out] | h_array | Pointer to the host array |
| [in] | array_len | Number of elements to copy |
|
inline |
Template function to copy data from device to host asynchronously.
| T | The type of elements to copy |
| [in] | d_array | Pointer to the device array |
| [out] | h_array | Pointer to the host array |
| [in] | array_len | Number of elements to copy |
| [in] | stream | The CUDA stream to use for the operation |
|
inline |
Template function to copy data from host to device.
| T | The type of elements to copy |
| [in] | h_array | Pointer to the host array |
| [in] | d_array | Pointer to the device array |
| [in] | array_len | Number of elements to copy |
|
inline |
Template function to copy data from host to device asynchronously.
| T | The type of elements to copy |
| [in] | h_array | Pointer to the host array |
| [out] | d_array | Pointer to the device array |
| [in] | array_len | Number of elements to copy |
| [in] | stream | The CUDA stream to use for the operation |
|
inline |
Template function to deallocate device memory.
| T | The type of elements to deallocate |
| [in,out] | pp | Pointer to the pointer that holds the allocated device memory |
|
inline |
Template function to deallocate device memory asynchronously.
| T | The type of elements to deallocate |
| [in,out] | pp | Pointer to the pointer that holds the allocated device memory |
| [in] | stream | The CUDA stream to use for the deallocation |
|
inline |
Template function to deallocate host-pinned memory.
| T | The type of elements to deallocate |
| [in,out] | pp | Pointer to the pointer that holds the allocated host-pinned memory |
|
inline |
Template function to reallocate device memory.
This function first deallocates any existing memory pointed to by *pp, then allocates new device memory for len elements of type T.
| T | The type of elements to allocate |
| [out] | pp | Pointer to the pointer that will hold the allocated device memory |
| [in] | len | Number of elements to allocate |
|
inline |
Template function to reallocate host-pinned memory.
This function first deallocates any existing memory pointed to by *pp, then allocates new host-pinned memory for len elements of type T.
| T | The type of elements to allocate |
| [out] | pp | Pointer to the pointer that will hold the allocated host-pinned memory |
| [in] | len | Number of elements to allocate |
|
virtual |
This function will be called after atom groups are calculated on GPU.
This function is useful when additional information is needed to transfer from the proxy. For example, the proxy can copy the lattice vectors in a separate stream, and this function can wait for that stream to complete.