LAMA
|
#include <CUDABLAS1.hpp>
Public Member Functions | |
template<> | |
void | scal (IndexType n, const float alpha, float *x_d, const IndexType incx, SyncToken *syncToken) |
scale | |
template<> | |
void | scal (IndexType n, const double alpha, double *x_d, const IndexType incx, SyncToken *syncToken) |
template<> | |
float | nrm2 (IndexType n, const float *x_d, IndexType incx, SyncToken *syncToken) |
nrm2 | |
template<> | |
double | nrm2 (IndexType n, const double *x_d, IndexType incx, SyncToken *syncToken) |
template<> | |
float | asum (const IndexType n, const float *x_d, const IndexType incX, SyncToken *syncToken) |
asum | |
template<> | |
double | asum (const IndexType n, const double *x_d, const IndexType incX, SyncToken *syncToken) |
template<> | |
IndexType | iamax (const IndexType n, const float *x_d, const IndexType incX, SyncToken *syncToken) |
iamax | |
template<> | |
IndexType | iamax (const IndexType n, const double *x_d, const IndexType incX, SyncToken *syncToken) |
template<> | |
void | swap (const IndexType n, float *x_d, const IndexType incX, float *y_d, const IndexType incY, SyncToken *syncToken) |
swap | |
template<> | |
void | swap (const IndexType n, double *x_d, const IndexType incX, double *y_d, const IndexType incY, SyncToken *syncToken) |
template<> | |
void | copy (IndexType n, const float *x_d, IndexType incx, float *y_d, IndexType incy, SyncToken *syncToken) |
copy | |
template<> | |
void | copy (IndexType n, const double *x_d, IndexType incx, double *y_d, IndexType incy, SyncToken *syncToken) |
template<> | |
void | axpy (IndexType n, float alpha, const float *x_d, IndexType incx, float *y_d, const IndexType incy, SyncToken *syncToken) |
axpy | |
template<> | |
void | axpy (IndexType n, double alpha, const double *x_d, IndexType incx, double *y_d, const IndexType incy, SyncToken *syncToken) |
template<> | |
float | dot (IndexType n, const float *x_d, IndexType incx, const float *y_d, IndexType incy, SyncToken *syncToken) |
dot | |
template<> | |
double | dot (IndexType n, const double *x_d, IndexType incx, const double *y_d, IndexType incy, SyncToken *syncToken) |
template<> | |
void | rot (const IndexType n, float *x_d, const IndexType incX, float *y_d, const IndexType incY, const float c, const float s, SyncToken *syncToken) |
rot | |
template<> | |
void | rot (const IndexType n, double *x_d, const IndexType incX, double *y_d, const IndexType incY, const double c, const double s, SyncToken *syncToken) |
template<> | |
void | rotm (const IndexType n, float *x_d, const IndexType incX, float *y_d, const IndexType incY, const float *p_d, SyncToken *syncToken) |
rotm | |
template<> | |
void | rotm (const IndexType n, double *x_d, const IndexType incX, double *y_d, const IndexType incY, const double *p_d, SyncToken *syncToken) |
Static Public Member Functions | |
template<typename T > | |
static void | scal (const IndexType n, const T alpha, T *x, const IndexType incX, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::scal. | |
template<typename T > | |
static T | nrm2 (const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::nrm2. | |
template<typename T > | |
static T | asum (const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::asum. | |
template<typename T > | |
static IndexType | iamax (const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::iamax. | |
template<typename T > | |
static void | swap (const IndexType n, T *y, const IndexType incY, T *x, const IndexType incX, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::swap. | |
template<typename T > | |
static void | copy (const IndexType n, const T *x, const IndexType incX, T *y, const IndexType incY, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::copy. | |
template<typename T > | |
static void | axpy (const IndexType n, const T alpha, const T *x, const IndexType incX, T *y, const IndexType incY, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::axpy. | |
template<typename T > | |
static T | dot (const IndexType n, const T *x, const IndexType incX, const T *y, const IndexType incY, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::dot. | |
template<typename T > | |
static void | sum (const IndexType n, T alpha, const T *x, T beta, const T *y, T *z, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::sum. | |
template<typename T > | |
static void | rot (const IndexType n, T *x, const IndexType incX, T *y, const IndexType incY, const T c, const T s, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::rot. | |
template<typename T > | |
static void | rotm (const IndexType n, T *x, const IndexType incX, T *y, const IndexType incY, const T *P, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::rotm. | |
template<typename T > | |
static void | ass (const IndexType n, const T value, T *x, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::ass. | |
template<typename T > | |
static T | viamax (const IndexType n, const T *x_d, const IndexType incx, SyncToken *syncToken) |
This function is the CUDA implementation of lama::BLAS1Interface::viamax. | |
Static Private Member Functions | |
template<typename T > | |
static void | ass_launcher (const int n, const T value, T *x, cudaStream_t stream) |
template<typename T > | |
static void | sum_launcher (const int n, T alpha, const T *x, T beta, const T *y, T *z, cudaStream_t stream) |
template void lama::CUDABLAS1::ass< double > | ( | const IndexType | n, |
const T | value, | ||
T * | x, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::ass.
rotg
rotmg
References ass_launcher(), lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUDA_ACCESS, and LAMA_CHECK_CUDA_ERROR.
static void lama::CUDABLAS1::ass_launcher | ( | const int | n, |
const T | value, | ||
T * | x, | ||
cudaStream_t | stream | ||
) | [static, private] |
Referenced by ass().
static T lama::CUDABLAS1::asum | ( | const IndexType | n, |
const T * | x, | ||
const IndexType | incX, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::asum.
float lama::CUDABLAS1::asum | ( | const IndexType | n, |
const float * | x_d, | ||
const IndexType | incX, | ||
SyncToken * | syncToken | ||
) |
double lama::CUDABLAS1::asum | ( | const IndexType | n, |
const double * | x_d, | ||
const IndexType | incX, | ||
SyncToken * | syncToken | ||
) |
static void lama::CUDABLAS1::axpy | ( | const IndexType | n, |
const T | alpha, | ||
const T * | x, | ||
const IndexType | incX, | ||
T * | y, | ||
const IndexType | incY, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::axpy.
void lama::CUDABLAS1::axpy | ( | IndexType | n, |
float | alpha, | ||
const float * | x_d, | ||
IndexType | incx, | ||
float * | y_d, | ||
const IndexType | incy, | ||
SyncToken * | syncToken | ||
) |
void lama::CUDABLAS1::axpy | ( | IndexType | n, |
double | alpha, | ||
const double * | x_d, | ||
IndexType | incx, | ||
double * | y_d, | ||
const IndexType | incy, | ||
SyncToken * | syncToken | ||
) |
static void lama::CUDABLAS1::copy | ( | const IndexType | n, |
const T * | x, | ||
const IndexType | incX, | ||
T * | y, | ||
const IndexType | incY, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::copy.
void lama::CUDABLAS1::copy | ( | IndexType | n, |
const float * | x_d, | ||
IndexType | incx, | ||
float * | y_d, | ||
IndexType | incy, | ||
SyncToken * | syncToken | ||
) |
void lama::CUDABLAS1::copy | ( | IndexType | n, |
const double * | x_d, | ||
IndexType | incx, | ||
double * | y_d, | ||
IndexType | incy, | ||
SyncToken * | syncToken | ||
) |
static T lama::CUDABLAS1::dot | ( | const IndexType | n, |
const T * | x, | ||
const IndexType | incX, | ||
const T * | y, | ||
const IndexType | incY, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::dot.
float lama::CUDABLAS1::dot | ( | IndexType | n, |
const float * | x_d, | ||
IndexType | incx, | ||
const float * | y_d, | ||
IndexType | incy, | ||
SyncToken * | syncToken | ||
) |
double lama::CUDABLAS1::dot | ( | IndexType | n, |
const double * | x_d, | ||
IndexType | incx, | ||
const double * | y_d, | ||
IndexType | incy, | ||
SyncToken * | syncToken | ||
) |
static IndexType lama::CUDABLAS1::iamax | ( | const IndexType | n, |
const T * | x, | ||
const IndexType | incX, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::iamax.
IndexType lama::CUDABLAS1::iamax | ( | const IndexType | n, |
const float * | x_d, | ||
const IndexType | incX, | ||
SyncToken * | syncToken | ||
) |
IndexType lama::CUDABLAS1::iamax | ( | const IndexType | n, |
const double * | x_d, | ||
const IndexType | incX, | ||
SyncToken * | syncToken | ||
) |
static T lama::CUDABLAS1::nrm2 | ( | const IndexType | n, |
const T * | x, | ||
const IndexType | incX, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::nrm2.
float lama::CUDABLAS1::nrm2 | ( | IndexType | n, |
const float * | x_d, | ||
IndexType | incx, | ||
SyncToken * | syncToken | ||
) |
double lama::CUDABLAS1::nrm2 | ( | IndexType | n, |
const double * | x_d, | ||
IndexType | incx, | ||
SyncToken * | syncToken | ||
) |
static void lama::CUDABLAS1::rot | ( | const IndexType | n, |
T * | x, | ||
const IndexType | incX, | ||
T * | y, | ||
const IndexType | incY, | ||
const T | c, | ||
const T | s, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::rot.
void lama::CUDABLAS1::rot | ( | const IndexType | n, |
float * | x_d, | ||
const IndexType | incX, | ||
float * | y_d, | ||
const IndexType | incY, | ||
const float | c, | ||
const float | s, | ||
SyncToken * | syncToken | ||
) |
void lama::CUDABLAS1::rot | ( | const IndexType | n, |
double * | x_d, | ||
const IndexType | incX, | ||
double * | y_d, | ||
const IndexType | incY, | ||
const double | c, | ||
const double | s, | ||
SyncToken * | syncToken | ||
) |
static void lama::CUDABLAS1::rotm | ( | const IndexType | n, |
T * | x, | ||
const IndexType | incX, | ||
T * | y, | ||
const IndexType | incY, | ||
const T * | P, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::rotm.
void lama::CUDABLAS1::rotm | ( | const IndexType | n, |
float * | x_d, | ||
const IndexType | incX, | ||
float * | y_d, | ||
const IndexType | incY, | ||
const float * | p_d, | ||
SyncToken * | syncToken | ||
) |
void lama::CUDABLAS1::rotm | ( | const IndexType | n, |
double * | x_d, | ||
const IndexType | incX, | ||
double * | y_d, | ||
const IndexType | incY, | ||
const double * | p_d, | ||
SyncToken * | syncToken | ||
) |
void lama::CUDABLAS1::scal | ( | IndexType | n, |
const float | alpha, | ||
float * | x_d, | ||
const IndexType | incx, | ||
SyncToken * | syncToken | ||
) |
static void lama::CUDABLAS1::scal | ( | const IndexType | n, |
const T | alpha, | ||
T * | x, | ||
const IndexType | incX, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::scal.
void lama::CUDABLAS1::scal | ( | IndexType | n, |
const double | alpha, | ||
double * | x_d, | ||
const IndexType | incx, | ||
SyncToken * | syncToken | ||
) |
template void lama::CUDABLAS1::sum< double > | ( | const IndexType | n, |
T | alpha, | ||
const T * | x, | ||
T | beta, | ||
const T * | y, | ||
T * | z, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::sum.
sum
References lama::CUDAStreamSyncToken::getCUDAStream(), LAMA_ASSERT_DEBUG, LAMA_CHECK_CUDA_ACCESS, LAMA_CHECK_CUDA_ERROR, and sum_launcher().
static void lama::CUDABLAS1::sum_launcher | ( | const int | n, |
T | alpha, | ||
const T * | x, | ||
T | beta, | ||
const T * | y, | ||
T * | z, | ||
cudaStream_t | stream | ||
) | [static, private] |
Referenced by sum().
static void lama::CUDABLAS1::swap | ( | const IndexType | n, |
T * | y, | ||
const IndexType | incY, | ||
T * | x, | ||
const IndexType | incX, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::swap.
Referenced by lama::CUDALAPACK::laswp().
void lama::CUDABLAS1::swap | ( | const IndexType | n, |
float * | x_d, | ||
const IndexType | incX, | ||
float * | y_d, | ||
const IndexType | incY, | ||
SyncToken * | syncToken | ||
) |
void lama::CUDABLAS1::swap | ( | const IndexType | n, |
double * | x_d, | ||
const IndexType | incX, | ||
double * | y_d, | ||
const IndexType | incY, | ||
SyncToken * | syncToken | ||
) |
template double lama::CUDABLAS1::viamax< double > | ( | const IndexType | n, |
const T * | x_d, | ||
const IndexType | incx, | ||
SyncToken * | syncToken | ||
) | [static] |
This function is the CUDA implementation of lama::BLAS1Interface::viamax.
References iamax(), LAMA_CHECK_CUDA_ERROR, and lama::max().