#include <CUDABLAS1.hpp>

Public Member Functions
template<>
void	scal (IndexType n, const float alpha, float x_d, const IndexType incx, SyncToken syncToken)
	scale
template<>
void	scal (IndexType n, const double alpha, double x_d, const IndexType incx, SyncToken syncToken)
template<>
float	nrm2 (IndexType n, const float x_d, IndexType incx, SyncToken syncToken)
	nrm2
template<>
double	nrm2 (IndexType n, const double x_d, IndexType incx, SyncToken syncToken)
template<>
float	asum (const IndexType n, const float x_d, const IndexType incX, SyncToken syncToken)
	asum
template<>
double	asum (const IndexType n, const double x_d, const IndexType incX, SyncToken syncToken)
template<>
IndexType	iamax (const IndexType n, const float x_d, const IndexType incX, SyncToken syncToken)
	iamax
template<>
IndexType	iamax (const IndexType n, const double x_d, const IndexType incX, SyncToken syncToken)
template<>
void	swap (const IndexType n, float x_d, const IndexType incX, float y_d, const IndexType incY, SyncToken *syncToken)
	swap
template<>
void	swap (const IndexType n, double x_d, const IndexType incX, double y_d, const IndexType incY, SyncToken *syncToken)
template<>
void	copy (IndexType n, const float x_d, IndexType incx, float y_d, IndexType incy, SyncToken *syncToken)
	copy
template<>
void	copy (IndexType n, const double x_d, IndexType incx, double y_d, IndexType incy, SyncToken *syncToken)
template<>
void	axpy (IndexType n, float alpha, const float x_d, IndexType incx, float y_d, const IndexType incy, SyncToken *syncToken)
	axpy
template<>
void	axpy (IndexType n, double alpha, const double x_d, IndexType incx, double y_d, const IndexType incy, SyncToken *syncToken)
template<>
float	dot (IndexType n, const float x_d, IndexType incx, const float y_d, IndexType incy, SyncToken *syncToken)
	dot
template<>
double	dot (IndexType n, const double x_d, IndexType incx, const double y_d, IndexType incy, SyncToken *syncToken)
template<>
void	rot (const IndexType n, float x_d, const IndexType incX, float y_d, const IndexType incY, const float c, const float s, SyncToken *syncToken)
	rot
template<>
void	rot (const IndexType n, double x_d, const IndexType incX, double y_d, const IndexType incY, const double c, const double s, SyncToken *syncToken)
template<>
void	rotm (const IndexType n, float x_d, const IndexType incX, float y_d, const IndexType incY, const float p_d, SyncToken syncToken)
	rotm
template<>
void	rotm (const IndexType n, double x_d, const IndexType incX, double y_d, const IndexType incY, const double p_d, SyncToken syncToken)
Static Public Member Functions
template<typename T >
static void	scal (const IndexType n, const T alpha, T x, const IndexType incX, SyncToken syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::scal.
template<typename T >
static T	nrm2 (const IndexType n, const T x, const IndexType incX, SyncToken syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::nrm2.
template<typename T >
static T	asum (const IndexType n, const T x, const IndexType incX, SyncToken syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::asum.
template<typename T >
static IndexType	iamax (const IndexType n, const T x, const IndexType incX, SyncToken syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::iamax.
template<typename T >
static void	swap (const IndexType n, T y, const IndexType incY, T x, const IndexType incX, SyncToken *syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::swap.
template<typename T >
static void	copy (const IndexType n, const T x, const IndexType incX, T y, const IndexType incY, SyncToken *syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::copy.
template<typename T >
static void	axpy (const IndexType n, const T alpha, const T x, const IndexType incX, T y, const IndexType incY, SyncToken *syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::axpy.
template<typename T >
static T	dot (const IndexType n, const T x, const IndexType incX, const T y, const IndexType incY, SyncToken *syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::dot.
template<typename T >
static void	sum (const IndexType n, T alpha, const T x, T beta, const T y, T z, SyncToken syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::sum.
template<typename T >
static void	rot (const IndexType n, T x, const IndexType incX, T y, const IndexType incY, const T c, const T s, SyncToken *syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::rot.
template<typename T >
static void	rotm (const IndexType n, T x, const IndexType incX, T y, const IndexType incY, const T P, SyncToken syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::rotm.
template<typename T >
static void	ass (const IndexType n, const T value, T x, SyncToken syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::ass.
template<typename T >
static T	viamax (const IndexType n, const T x_d, const IndexType incx, SyncToken syncToken)
	This function is the CUDA implementation of lama::BLAS1Interface::viamax.
Static Private Member Functions
template<typename T >
static void	ass_launcher (const int n, const T value, T *x, cudaStream_t stream)
template<typename T >
static void	sum_launcher (const int n, T alpha, const T x, T beta, const T y, T *z, cudaStream_t stream)

Member Function Documentation

template<typename T >

template void lama::CUDABLAS1::ass< double >	(	const IndexType	n,
		const T	value,
		T *	x,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::ass.

rotg

rotmg

References ass_launcher(), lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<typename T >

static void lama::CUDABLAS1::ass_launcher	(	const int	n,
		const T	value,
		T *	x,
		cudaStream_t	stream
	)		`[static, private]`

Referenced by ass().

template<typename T >

static T lama::CUDABLAS1::asum	(	const IndexType	n,
		const T *	x,
		const IndexType	incX,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::asum.

template<>

float lama::CUDABLAS1::asum	(	const IndexType	n,
		const float *	x_d,
		const IndexType	incX,
		SyncToken *	syncToken
	)

asum

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<>

double lama::CUDABLAS1::asum	(	const IndexType	n,
		const double *	x_d,
		const IndexType	incX,
		SyncToken *	syncToken
	)

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<typename T >

static void lama::CUDABLAS1::axpy	(	const IndexType	n,
		const T	alpha,
		const T *	x,
		const IndexType	incX,
		T *	y,
		const IndexType	incY,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::axpy.

template<>

void lama::CUDABLAS1::axpy	(	IndexType	n,
		float	alpha,
		const float *	x_d,
		IndexType	incx,
		float *	y_d,
		const IndexType	incy,
		SyncToken *	syncToken
	)

axpy

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<>

void lama::CUDABLAS1::axpy	(	IndexType	n,
		double	alpha,
		const double *	x_d,
		IndexType	incx,
		double *	y_d,
		const IndexType	incy,
		SyncToken *	syncToken
	)

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<typename T >

static void lama::CUDABLAS1::copy	(	const IndexType	n,
		const T *	x,
		const IndexType	incX,
		T *	y,
		const IndexType	incY,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::copy.

template<>

void lama::CUDABLAS1::copy	(	IndexType	n,
		const float *	x_d,
		IndexType	incx,
		float *	y_d,
		IndexType	incy,
		SyncToken *	syncToken
	)

copy

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<>

void lama::CUDABLAS1::copy	(	IndexType	n,
		const double *	x_d,
		IndexType	incx,
		double *	y_d,
		IndexType	incy,
		SyncToken *	syncToken
	)

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<typename T >

static T lama::CUDABLAS1::dot	(	const IndexType	n,
		const T *	x,
		const IndexType	incX,
		const T *	y,
		const IndexType	incY,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::dot.

template<>

float lama::CUDABLAS1::dot	(	IndexType	n,
		const float *	x_d,
		IndexType	incx,
		const float *	y_d,
		IndexType	incy,
		SyncToken *	syncToken
	)

dot

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<>

double lama::CUDABLAS1::dot	(	IndexType	n,
		const double *	x_d,
		IndexType	incx,
		const double *	y_d,
		IndexType	incy,
		SyncToken *	syncToken
	)

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<typename T >

static IndexType lama::CUDABLAS1::iamax	(	const IndexType	n,
		const T *	x,
		const IndexType	incX,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::iamax.

Referenced by iamax(), and viamax().

template<>

IndexType lama::CUDABLAS1::iamax	(	const IndexType	n,
		const float *	x_d,
		const IndexType	incX,
		SyncToken *	syncToken
	)

iamax

References lama::CUDAStreamSyncToken::getCUDAStream(), iamax(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<>

IndexType lama::CUDABLAS1::iamax	(	const IndexType	n,
		const double *	x_d,
		const IndexType	incX,
		SyncToken *	syncToken
	)

References lama::CUDAStreamSyncToken::getCUDAStream(), iamax(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<typename T >

static T lama::CUDABLAS1::nrm2	(	const IndexType	n,
		const T *	x,
		const IndexType	incX,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::nrm2.

template<>

float lama::CUDABLAS1::nrm2	(	IndexType	n,
		const float *	x_d,
		IndexType	incx,
		SyncToken *	syncToken
	)

nrm2

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<>

double lama::CUDABLAS1::nrm2	(	IndexType	n,
		const double *	x_d,
		IndexType	incx,
		SyncToken *	syncToken
	)

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<typename T >

static void lama::CUDABLAS1::rot	(	const IndexType	n,
		T *	x,
		const IndexType	incX,
		T *	y,
		const IndexType	incY,
		const T	c,
		const T	s,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::rot.

template<>

void lama::CUDABLAS1::rot	(	const IndexType	n,
		float *	x_d,
		const IndexType	incX,
		float *	y_d,
		const IndexType	incY,
		const float	c,
		const float	s,
		SyncToken *	syncToken
	)

rot

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<>

void lama::CUDABLAS1::rot	(	const IndexType	n,
		double *	x_d,
		const IndexType	incX,
		double *	y_d,
		const IndexType	incY,
		const double	c,
		const double	s,
		SyncToken *	syncToken
	)

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<typename T >

static void lama::CUDABLAS1::rotm	(	const IndexType	n,
		T *	x,
		const IndexType	incX,
		T *	y,
		const IndexType	incY,
		const T *	P,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::rotm.

template<>

void lama::CUDABLAS1::rotm	(	const IndexType	n,
		float *	x_d,
		const IndexType	incX,
		float *	y_d,
		const IndexType	incY,
		const float *	p_d,
		SyncToken *	syncToken
	)

rotm

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<>

void lama::CUDABLAS1::rotm	(	const IndexType	n,
		double *	x_d,
		const IndexType	incX,
		double *	y_d,
		const IndexType	incY,
		const double *	p_d,
		SyncToken *	syncToken
	)

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<>

void lama::CUDABLAS1::scal	(	IndexType	n,
		const float	alpha,
		float *	x_d,
		const IndexType	incx,
		SyncToken *	syncToken
	)

scale

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<typename T >

static void lama::CUDABLAS1::scal	(	const IndexType	n,
		const T	alpha,
		T *	x,
		const IndexType	incX,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::scal.

template<>

void lama::CUDABLAS1::scal	(	IndexType	n,
		const double	alpha,
		double *	x_d,
		const IndexType	incx,
		SyncToken *	syncToken
	)

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<typename T >

template void lama::CUDABLAS1::sum< double >	(	const IndexType	n,
		T	alpha,
		const T *	x,
		T	beta,
		const T *	y,
		T *	z,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::sum.

sum

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUDA_ACCESS, LAMA_CHECK_CUDA_ERROR, and sum_launcher().

template<typename T >

static void lama::CUDABLAS1::sum_launcher	(	const int	n,
		T	alpha,
		const T *	x,
		T	beta,
		const T *	y,
		T *	z,
		cudaStream_t	stream
	)		`[static, private]`

Referenced by sum().

template<typename T >

static void lama::CUDABLAS1::swap	(	const IndexType	n,
		T *	y,
		const IndexType	incY,
		T *	x,
		const IndexType	incX,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::swap.

Referenced by lama::CUDALAPACK::laswp().

template<>

void lama::CUDABLAS1::swap	(	const IndexType	n,
		float *	x_d,
		const IndexType	incX,
		float *	y_d,
		const IndexType	incY,
		SyncToken *	syncToken
	)

swap

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<>

void lama::CUDABLAS1::swap	(	const IndexType	n,
		double *	x_d,
		const IndexType	incX,
		double *	y_d,
		const IndexType	incY,
		SyncToken *	syncToken
	)

References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUBLAS_ERROR, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.

template<typename T >

template double lama::CUDABLAS1::viamax< double >	(	const IndexType	n,
		const T *	x_d,
		const IndexType	incx,
		SyncToken *	syncToken
	)		`[static]`

This function is the CUDA implementation of lama::BLAS1Interface::viamax.

References iamax(), LAMA_CHECK_CUDA_ERROR, and lama::max().

The documentation for this class was generated from the following files:

/home/brandes/workspace/LAMA/src/lama/cuda/CUDABLAS1.hpp
/home/brandes/workspace/LAMA/src/lama/cuda/CUDABLAS1.cpp

Public Member Functions

Static Public Member Functions

Static Private Member Functions

Member Function Documentation