LAMA
lama::CUDABLAS1 Class Reference

#include <CUDABLAS1.hpp>

Public Member Functions

template<>
void scal (IndexType n, const float alpha, float *x_d, const IndexType incx, SyncToken *syncToken)
 scale
template<>
void scal (IndexType n, const double alpha, double *x_d, const IndexType incx, SyncToken *syncToken)
template<>
float nrm2 (IndexType n, const float *x_d, IndexType incx, SyncToken *syncToken)
 nrm2
template<>
double nrm2 (IndexType n, const double *x_d, IndexType incx, SyncToken *syncToken)
template<>
float asum (const IndexType n, const float *x_d, const IndexType incX, SyncToken *syncToken)
 asum
template<>
double asum (const IndexType n, const double *x_d, const IndexType incX, SyncToken *syncToken)
template<>
IndexType iamax (const IndexType n, const float *x_d, const IndexType incX, SyncToken *syncToken)
 iamax
template<>
IndexType iamax (const IndexType n, const double *x_d, const IndexType incX, SyncToken *syncToken)
template<>
void swap (const IndexType n, float *x_d, const IndexType incX, float *y_d, const IndexType incY, SyncToken *syncToken)
 swap
template<>
void swap (const IndexType n, double *x_d, const IndexType incX, double *y_d, const IndexType incY, SyncToken *syncToken)
template<>
void copy (IndexType n, const float *x_d, IndexType incx, float *y_d, IndexType incy, SyncToken *syncToken)
 copy
template<>
void copy (IndexType n, const double *x_d, IndexType incx, double *y_d, IndexType incy, SyncToken *syncToken)
template<>
void axpy (IndexType n, float alpha, const float *x_d, IndexType incx, float *y_d, const IndexType incy, SyncToken *syncToken)
 axpy
template<>
void axpy (IndexType n, double alpha, const double *x_d, IndexType incx, double *y_d, const IndexType incy, SyncToken *syncToken)
template<>
float dot (IndexType n, const float *x_d, IndexType incx, const float *y_d, IndexType incy, SyncToken *syncToken)
 dot
template<>
double dot (IndexType n, const double *x_d, IndexType incx, const double *y_d, IndexType incy, SyncToken *syncToken)
template<>
void rot (const IndexType n, float *x_d, const IndexType incX, float *y_d, const IndexType incY, const float c, const float s, SyncToken *syncToken)
 rot
template<>
void rot (const IndexType n, double *x_d, const IndexType incX, double *y_d, const IndexType incY, const double c, const double s, SyncToken *syncToken)
template<>
void rotm (const IndexType n, float *x_d, const IndexType incX, float *y_d, const IndexType incY, const float *p_d, SyncToken *syncToken)
 rotm
template<>
void rotm (const IndexType n, double *x_d, const IndexType incX, double *y_d, const IndexType incY, const double *p_d, SyncToken *syncToken)

Static Public Member Functions

template<typename T >
static void scal (const IndexType n, const T alpha, T *x, const IndexType incX, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::scal.
template<typename T >
static T nrm2 (const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::nrm2.
template<typename T >
static T asum (const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::asum.
template<typename T >
static IndexType iamax (const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::iamax.
template<typename T >
static void swap (const IndexType n, T *y, const IndexType incY, T *x, const IndexType incX, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::swap.
template<typename T >
static void copy (const IndexType n, const T *x, const IndexType incX, T *y, const IndexType incY, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::copy.
template<typename T >
static void axpy (const IndexType n, const T alpha, const T *x, const IndexType incX, T *y, const IndexType incY, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::axpy.
template<typename T >
static T dot (const IndexType n, const T *x, const IndexType incX, const T *y, const IndexType incY, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::dot.
template<typename T >
static void sum (const IndexType n, T alpha, const T *x, T beta, const T *y, T *z, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::sum.
template<typename T >
static void rot (const IndexType n, T *x, const IndexType incX, T *y, const IndexType incY, const T c, const T s, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::rot.
template<typename T >
static void rotm (const IndexType n, T *x, const IndexType incX, T *y, const IndexType incY, const T *P, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::rotm.
template<typename T >
static void ass (const IndexType n, const T value, T *x, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::ass.
template<typename T >
static T viamax (const IndexType n, const T *x_d, const IndexType incx, SyncToken *syncToken)
 This function is the CUDA implementation of lama::BLAS1Interface::viamax.

Static Private Member Functions

template<typename T >
static void ass_launcher (const int n, const T value, T *x, cudaStream_t stream)
template<typename T >
static void sum_launcher (const int n, T alpha, const T *x, T beta, const T *y, T *z, cudaStream_t stream)

Member Function Documentation

template<typename T >
template void lama::CUDABLAS1::ass< double > ( const IndexType  n,
const T  value,
T *  x,
SyncToken syncToken 
) [static]
template<typename T >
static void lama::CUDABLAS1::ass_launcher ( const int  n,
const T  value,
T *  x,
cudaStream_t  stream 
) [static, private]

Referenced by ass().

template<typename T >
static T lama::CUDABLAS1::asum ( const IndexType  n,
const T *  x,
const IndexType  incX,
SyncToken syncToken 
) [static]

This function is the CUDA implementation of lama::BLAS1Interface::asum.

template<>
float lama::CUDABLAS1::asum ( const IndexType  n,
const float *  x_d,
const IndexType  incX,
SyncToken syncToken 
)
template<typename T >
static void lama::CUDABLAS1::axpy ( const IndexType  n,
const T  alpha,
const T *  x,
const IndexType  incX,
T *  y,
const IndexType  incY,
SyncToken syncToken 
) [static]

This function is the CUDA implementation of lama::BLAS1Interface::axpy.

template<>
void lama::CUDABLAS1::axpy ( IndexType  n,
float  alpha,
const float *  x_d,
IndexType  incx,
float *  y_d,
const IndexType  incy,
SyncToken syncToken 
)
template<>
void lama::CUDABLAS1::axpy ( IndexType  n,
double  alpha,
const double *  x_d,
IndexType  incx,
double *  y_d,
const IndexType  incy,
SyncToken syncToken 
)
template<typename T >
static void lama::CUDABLAS1::copy ( const IndexType  n,
const T *  x,
const IndexType  incX,
T *  y,
const IndexType  incY,
SyncToken syncToken 
) [static]

This function is the CUDA implementation of lama::BLAS1Interface::copy.

template<>
void lama::CUDABLAS1::copy ( IndexType  n,
const float *  x_d,
IndexType  incx,
float *  y_d,
IndexType  incy,
SyncToken syncToken 
)
template<>
void lama::CUDABLAS1::copy ( IndexType  n,
const double *  x_d,
IndexType  incx,
double *  y_d,
IndexType  incy,
SyncToken syncToken 
)
template<typename T >
static T lama::CUDABLAS1::dot ( const IndexType  n,
const T *  x,
const IndexType  incX,
const T *  y,
const IndexType  incY,
SyncToken syncToken 
) [static]

This function is the CUDA implementation of lama::BLAS1Interface::dot.

template<>
float lama::CUDABLAS1::dot ( IndexType  n,
const float *  x_d,
IndexType  incx,
const float *  y_d,
IndexType  incy,
SyncToken syncToken 
)
template<>
double lama::CUDABLAS1::dot ( IndexType  n,
const double *  x_d,
IndexType  incx,
const double *  y_d,
IndexType  incy,
SyncToken syncToken 
)
template<typename T >
static IndexType lama::CUDABLAS1::iamax ( const IndexType  n,
const T *  x,
const IndexType  incX,
SyncToken syncToken 
) [static]

This function is the CUDA implementation of lama::BLAS1Interface::iamax.

Referenced by iamax(), and viamax().

template<typename T >
static T lama::CUDABLAS1::nrm2 ( const IndexType  n,
const T *  x,
const IndexType  incX,
SyncToken syncToken 
) [static]

This function is the CUDA implementation of lama::BLAS1Interface::nrm2.

template<typename T >
static void lama::CUDABLAS1::rot ( const IndexType  n,
T *  x,
const IndexType  incX,
T *  y,
const IndexType  incY,
const T  c,
const T  s,
SyncToken syncToken 
) [static]

This function is the CUDA implementation of lama::BLAS1Interface::rot.

template<>
void lama::CUDABLAS1::rot ( const IndexType  n,
float *  x_d,
const IndexType  incX,
float *  y_d,
const IndexType  incY,
const float  c,
const float  s,
SyncToken syncToken 
)
template<>
void lama::CUDABLAS1::rot ( const IndexType  n,
double *  x_d,
const IndexType  incX,
double *  y_d,
const IndexType  incY,
const double  c,
const double  s,
SyncToken syncToken 
)
template<typename T >
static void lama::CUDABLAS1::rotm ( const IndexType  n,
T *  x,
const IndexType  incX,
T *  y,
const IndexType  incY,
const T *  P,
SyncToken syncToken 
) [static]

This function is the CUDA implementation of lama::BLAS1Interface::rotm.

template<>
void lama::CUDABLAS1::rotm ( const IndexType  n,
float *  x_d,
const IndexType  incX,
float *  y_d,
const IndexType  incY,
const float *  p_d,
SyncToken syncToken 
)
template<>
void lama::CUDABLAS1::rotm ( const IndexType  n,
double *  x_d,
const IndexType  incX,
double *  y_d,
const IndexType  incY,
const double *  p_d,
SyncToken syncToken 
)
template<>
void lama::CUDABLAS1::scal ( IndexType  n,
const float  alpha,
float *  x_d,
const IndexType  incx,
SyncToken syncToken 
)
template<typename T >
static void lama::CUDABLAS1::scal ( const IndexType  n,
const T  alpha,
T *  x,
const IndexType  incX,
SyncToken syncToken 
) [static]

This function is the CUDA implementation of lama::BLAS1Interface::scal.

template<>
void lama::CUDABLAS1::scal ( IndexType  n,
const double  alpha,
double *  x_d,
const IndexType  incx,
SyncToken syncToken 
)
template<typename T >
template void lama::CUDABLAS1::sum< double > ( const IndexType  n,
alpha,
const T *  x,
beta,
const T *  y,
T *  z,
SyncToken syncToken 
) [static]
template<typename T >
static void lama::CUDABLAS1::sum_launcher ( const int  n,
alpha,
const T *  x,
beta,
const T *  y,
T *  z,
cudaStream_t  stream 
) [static, private]

Referenced by sum().

template<typename T >
static void lama::CUDABLAS1::swap ( const IndexType  n,
T *  y,
const IndexType  incY,
T *  x,
const IndexType  incX,
SyncToken syncToken 
) [static]

This function is the CUDA implementation of lama::BLAS1Interface::swap.

Referenced by lama::CUDALAPACK::laswp().

template<>
void lama::CUDABLAS1::swap ( const IndexType  n,
float *  x_d,
const IndexType  incX,
float *  y_d,
const IndexType  incY,
SyncToken syncToken 
)
template<>
void lama::CUDABLAS1::swap ( const IndexType  n,
double *  x_d,
const IndexType  incX,
double *  y_d,
const IndexType  incY,
SyncToken syncToken 
)
template<typename T >
template double lama::CUDABLAS1::viamax< double > ( const IndexType  n,
const T *  x_d,
const IndexType  incx,
SyncToken syncToken 
) [static]

This function is the CUDA implementation of lama::BLAS1Interface::viamax.

References iamax(), LAMA_CHECK_CUDA_ERROR, and lama::max().


The documentation for this class was generated from the following files: