Class for managing GPU memory allocation and data transfer. More...

#include <colvarproxy_gpu.h>

Inheritance diagram for colvarproxy_gpu:

[legend]

Public Member Functions
	colvarproxy_gpu ()
	Constructor.

bool	has_gpu_support () const
	Whether the proxy supports GPU.

virtual cudaStream_t	get_default_stream ()
	Get the default CUDA stream from the proxy.

template<typename T >
int	allocate_host (T **pp, const size_t len)
	Template function to allocate host-pinned memory. More...

template<typename T >
int	deallocate_host (T **pp)
	Template function to deallocate host-pinned memory. More...

template<typename T >
int	allocate_device (T **pp, const size_t len)
	Template function to allocate device memory. More...

template<typename T >
int	reallocate_device (T **pp, const size_t len)
	Template function to reallocate device memory. More...

template<typename T >
int	reallocate_host (T **pp, const size_t len)
	Template function to reallocate host-pinned memory. More...

template<typename T >
int	allocate_device_async (T **pp, const size_t len, cudaStream_t stream)
	Template function to allocate device memory asynchronously. More...

template<typename T >
int	deallocate_device (T **pp)
	Template function to deallocate device memory. More...

template<typename T >
int	deallocate_device_async (T **pp, cudaStream_t stream)
	Template function to deallocate device memory asynchronously. More...

template<typename T >
int	clear_device_array (T *data, const size_t ndata)
	Template function to clear a device array to zero. More...

template<typename T >
int	clear_device_array_async (T *data, const size_t ndata, cudaStream_t stream)
	Template function to clear a device array to zero asynchronously. More...

template<typename T >
int	copy_HtoD (const T h_array, T d_array, size_t array_len)
	Template function to copy data from host to device. More...

template<typename T >
int	copy_HtoD_async (const T h_array, T d_array, size_t array_len, cudaStream_t stream)
	Template function to copy data from host to device asynchronously. More...

template<typename T >
int	copy_DtoH (const T d_array, T h_array, size_t array_len)
	Template function to copy data from device to host. More...

template<typename T >
int	copy_DtoH_async (const T d_array, T h_array, size_t array_len, cudaStream_t stream)
	Template function to copy data from device to host asynchronously. More...

template<typename T >
int	copy_DtoD (const T d_src, T d_dst, size_t array_len)
	Template function to copy data from device to device. More...

template<typename T >
int	copy_DtoD_async (const T d_src, T d_dst, size_t array_len, cudaStream_t stream)
	Template function to copy data from device to device asynchronously. More...

virtual int	wait_for_extra_info_ready ()
	This function will be called after atom groups are calculated on GPU. More...

virtual	~colvarproxy_gpu ()
	Destructor.


virtual int	allocate_host_T (void **pp, const size_t len, const size_t sizeofT)
	Memory management and data transfer implementations.

virtual int	deallocate_host_T (void **pp)

virtual int	allocate_device_T (void **pp, const size_t len, const size_t sizeofT)

virtual int	deallocate_device_T (void **pp)

virtual int	clear_device_array_T (void *data, const size_t ndata, const size_t sizeofT)

virtual int	allocate_device_T_async (void **pp, const size_t len, const size_t sizeofT, cudaStream_t stream)

virtual int	deallocate_device_T_async (void **pp, cudaStream_t stream)

virtual int	clear_device_array_T_async (void *data, const size_t ndata, const size_t sizeofT, cudaStream_t stream)

virtual int	copy_HtoD_T (const void h_array, void d_array, size_t array_len, const size_t sizeofT)

virtual int	copy_HtoD_T_async (const void h_array, void d_array, size_t array_len, const size_t sizeofT, cudaStream_t stream)

virtual int	copy_DtoH_T (const void d_array, void h_array, size_t array_len, const size_t sizeofT)

virtual int	copy_DtoH_T_async (const void d_array, void h_array, size_t array_len, const size_t sizeofT, cudaStream_t stream)

virtual int	copy_DtoD_T (const void d_src, void d_dst, size_t array_len, const size_t sizeofT)

virtual int	copy_DtoD_T_async (const void d_src, void d_dst, size_t array_len, const size_t sizeofT, cudaStream_t stream)


virtual float *	proxy_atoms_masses_gpu_float ()
	Functions to get device pointers for atom properties This functions should be overridden in derived proxy classes that manage actual GPU memory.

virtual float *	proxy_atoms_charges_gpu_float ()

virtual cvm::real *	proxy_atoms_masses_gpu ()

virtual cvm::real *	proxy_atoms_charges_gpu ()

virtual cvm::real *	proxy_atoms_positions_gpu ()

virtual cvm::real *	proxy_atoms_total_forces_gpu ()

virtual cvm::real *	proxy_atoms_new_colvar_forces_gpu ()

Protected Attributes
bool	support_gpu
	Whether the proxy supports GPU.

Detailed Description

Class for managing GPU memory allocation and data transfer.

Member Function Documentation

◆ allocate_device()

template<typename T >

int colvarproxy_gpu::allocate_device	(	T **	pp,
		const size_t	len
	)

inline

Template function to allocate device memory.

Template Parameters

T	The type of elements to allocate

Parameters

[out]	pp	Pointer to the pointer that will hold the allocated device memory
[in]	len	Number of elements to allocate

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ allocate_device_async()

template<typename T >

int colvarproxy_gpu::allocate_device_async	(	T **	pp,
		const size_t	len,
		cudaStream_t	stream
	)

inline

Template function to allocate device memory asynchronously.

Template Parameters

T	The type of elements to allocate

Parameters

[out]	pp	Pointer to the pointer that will hold the allocated device memory
[in]	len	Number of elements to allocate
[in]	stream	The CUDA stream to use for the allocation

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ allocate_host()

template<typename T >

int colvarproxy_gpu::allocate_host	(	T **	pp,
		const size_t	len
	)

inline

Template function to allocate host-pinned memory.

Template Parameters

T	The type of elements to allocate

Parameters

[out]	pp	Pointer to the pointer that will hold the allocated host-pinned memory
[in]	len	Number of elements to allocate

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ clear_device_array()

template<typename T >

int colvarproxy_gpu::clear_device_array	(	T *	data,
		const size_t	ndata
	)

inline

Template function to clear a device array to zero.

Template Parameters

T	The type of elements in the array

Parameters

[in]	data	Pointer to the device array to clear
[in]	ndata	Number of elements in the array

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ clear_device_array_async()

template<typename T >

int colvarproxy_gpu::clear_device_array_async	(	T *	data,
		const size_t	ndata,
		cudaStream_t	stream
	)

inline

Template function to clear a device array to zero asynchronously.

Template Parameters

T	The type of elements in the array

Parameters

[in]	data	Pointer to the device array to clear
[in]	ndata	Number of elements in the array
[in]	stream	The CUDA stream to use for the operation

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ copy_DtoD()

template<typename T >

int colvarproxy_gpu::copy_DtoD	(	const T *	d_src,
		T *	d_dst,
		size_t	array_len
	)

inline

Template function to copy data from device to device.

Template Parameters

T	The type of elements to copy

Parameters

[in]	d_src	Pointer to the source device array
[out]	d_dst	Pointer to the destination device array
[in]	array_len	Number of elements to copy

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ copy_DtoD_async()

template<typename T >

int colvarproxy_gpu::copy_DtoD_async	(	const T *	d_src,
		T *	d_dst,
		size_t	array_len,
		cudaStream_t	stream
	)

inline

Template function to copy data from device to device asynchronously.

Template Parameters

T	The type of elements to copy

Parameters

[in]	d_src	Pointer to the source device array
[out]	d_dst	Pointer to the destination device array
[in]	array_len	Number of elements to copy
[in]	stream	The CUDA stream to use for the operation

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ copy_DtoH()

template<typename T >

int colvarproxy_gpu::copy_DtoH	(	const T *	d_array,
		T *	h_array,
		size_t	array_len
	)

inline

Template function to copy data from device to host.

Template Parameters

T	The type of elements to copy

Parameters

[in]	d_array	Pointer to the device array
[out]	h_array	Pointer to the host array
[in]	array_len	Number of elements to copy

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ copy_DtoH_async()

template<typename T >

int colvarproxy_gpu::copy_DtoH_async	(	const T *	d_array,
		T *	h_array,
		size_t	array_len,
		cudaStream_t	stream
	)

inline

Template function to copy data from device to host asynchronously.

Template Parameters

T	The type of elements to copy

Parameters

[in]	d_array	Pointer to the device array
[out]	h_array	Pointer to the host array
[in]	array_len	Number of elements to copy
[in]	stream	The CUDA stream to use for the operation

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ copy_HtoD()

template<typename T >

int colvarproxy_gpu::copy_HtoD	(	const T *	h_array,
		T *	d_array,
		size_t	array_len
	)

inline

Template function to copy data from host to device.

Template Parameters

T	The type of elements to copy

Parameters

[in]	h_array	Pointer to the host array
[in]	d_array	Pointer to the device array
[in]	array_len	Number of elements to copy

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ copy_HtoD_async()

template<typename T >

int colvarproxy_gpu::copy_HtoD_async	(	const T *	h_array,
		T *	d_array,
		size_t	array_len,
		cudaStream_t	stream
	)

inline

Template function to copy data from host to device asynchronously.

Template Parameters

T	The type of elements to copy

Parameters

[in]	h_array	Pointer to the host array
[out]	d_array	Pointer to the device array
[in]	array_len	Number of elements to copy
[in]	stream	The CUDA stream to use for the operation

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ deallocate_device()

template<typename T >

int colvarproxy_gpu::deallocate_device ( T ** pp )

inline

Template function to deallocate device memory.

Template Parameters

T	The type of elements to deallocate

Parameters

[in,out] pp Pointer to the pointer that holds the allocated device memory

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ deallocate_device_async()

template<typename T >

int colvarproxy_gpu::deallocate_device_async	(	T **	pp,
		cudaStream_t	stream
	)

inline

Template function to deallocate device memory asynchronously.

Template Parameters

T	The type of elements to deallocate

Parameters

[in,out]	pp	Pointer to the pointer that holds the allocated device memory
[in]	stream	The CUDA stream to use for the deallocation

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ deallocate_host()

template<typename T >

int colvarproxy_gpu::deallocate_host ( T ** pp )

inline

Template function to deallocate host-pinned memory.

Template Parameters

T	The type of elements to deallocate

Parameters

[in,out] pp Pointer to the pointer that holds the allocated host-pinned memory

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ reallocate_device()

template<typename T >

int colvarproxy_gpu::reallocate_device	(	T **	pp,
		const size_t	len
	)

inline

Template function to reallocate device memory.

This function first deallocates any existing memory pointed to by *pp, then allocates new device memory for len elements of type T.

Template Parameters

T	The type of elements to allocate

Parameters

[out]	pp	Pointer to the pointer that will hold the allocated device memory
[in]	len	Number of elements to allocate

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ reallocate_host()

template<typename T >

int colvarproxy_gpu::reallocate_host	(	T **	pp,
		const size_t	len
	)

inline

Template function to reallocate host-pinned memory.

This function first deallocates any existing memory pointed to by *pp, then allocates new host-pinned memory for len elements of type T.

Template Parameters

T	The type of elements to allocate

Parameters

[out]	pp	Pointer to the pointer that will hold the allocated host-pinned memory
[in]	len	Number of elements to allocate

Returns: COLVARS_OK if successful, otherwise COLVARS_ERROR

◆ wait_for_extra_info_ready()

int colvarproxy_gpu::wait_for_extra_info_ready ( )

virtual

This function will be called after atom groups are calculated on GPU.

This function is useful when additional information is needed to transfer from the proxy. For example, the proxy can copy the lattice vectors in a separate stream, and this function can wait for that stream to complete.

The documentation for this class was generated from the following files:

colvarproxy_gpu.h
colvarproxy_gpu.cpp

Public Member Functions

Protected Attributes

Detailed Description

Member Function Documentation

◆ allocate_device()

◆ allocate_device_async()

◆ allocate_host()

◆ clear_device_array()

◆ clear_device_array_async()

◆ copy_DtoD()

◆ copy_DtoD_async()

◆ copy_DtoH()

◆ copy_DtoH_async()

◆ copy_HtoD()

◆ copy_HtoD_async()

◆ deallocate_device()

◆ deallocate_device_async()

◆ deallocate_host()

◆ reallocate_device()

◆ reallocate_host()

◆ wait_for_extra_info_ready()