LAMA
|
#include <LAMAInterface.hpp>
Public Member Functions | |
BLAS1Interface () | |
Default constructor, initializes variables with NULL. | |
Data Fields | |
void(* | scal )(const IndexType n, const T alpha, T *x, const IndexType incX, SyncToken *syncToken) |
scal replaces vector x with alpha * x. | |
T(* | nrm2 )(const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken) |
nrm2 computes the Euclidean norm of the n-vector x (with storage increment incX). | |
T(* | asum )(const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken) |
asum computes the sum of the absolute values of the elements of vector x | |
IndexType(* | iamax )(const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken) |
max norm: maxnorm(x) = max( x_i ) | |
T(* | viamax )(const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken) |
iamax finds the smallest index of the maximum magnitude element of vector x | |
void(* | swap )(const IndexType n, T *x, const IndexType incX, T *y, const IndexType incY, SyncToken *syncToken) |
swap interchanges vector x with vector y. | |
void(* | copy )(const IndexType n, const T *x, const IndexType incX, T *y, const IndexType incY, SyncToken *syncToken) |
copy copies the vector x to the vector y. | |
void(* | axpy )(const IndexType n, T alpha, const T *x, const IndexType incX, T *y, const IndexType incY, SyncToken *syncToken) |
axpy multiplies vector x by scalar alpha and adds the result to vector y. | |
T(* | dot )(const IndexType n, const T *x, const IndexType incX, const T *y, const IndexType inc, SyncToken *syncToken) |
dot computes the dot product of two vectors. | |
void(* | sum )(const IndexType n, T alpha, const T *x, T beta, const T *y, T *z, SyncToken *syncToken) |
TODO: need dotu ?! | |
void(* | rot )(const IndexType N, T *X, const IndexType incX, T *Y, const IndexType incY, const T c, const T s, SyncToken *syncToken) |
rot multiplies a 2×2 matrix with the 2×n matrix | |
void(* | rotm )(const IndexType N, T *X, const IndexType incX, T *Y, const IndexType incY, const T *P, SyncToken *syncToken) |
rotm ? | |
void(* | rotg )(T *a, T *b, T *c, T *s, SyncToken *syncToken) |
rotg The function cublasSrot(n, x, incX, y, incY, c, s) normally is called next to apply the transformation to a 2×n matrix | |
void(* | rotmg )(T *d1, T *d2, T *b1, const T b2, T *p, SyncToken *syncToken) |
Computes the parameters for a modified Givens rotation. | |
void(* | ass )(const IndexType n, const T value, T *x, SyncToken *syncToken) |
ass The function ass() assigns one scalar value to a vector of the given size. |
lama::BLAS1Interface< T >::BLAS1Interface | ( | ) |
Default constructor, initializes variables with NULL.
void(* lama::BLAS1Interface< T >::ass)(const IndexType n, const T value, T *x, SyncToken *syncToken) |
ass The function ass() assigns one scalar value to a vector of the given size.
This function is OpenMP implementation of lama::BLAS1Interface::ass
The function ass() assigns one scalar value to a vector of the given size.
[in] | n | size of the vector |
[in] | value | scalar value, which should be assign to the whole vector |
[out] | x | vector, the values should be assigned to |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
T(* lama::BLAS1Interface< T >::asum)(const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken) |
asum computes the sum of the absolute values of the elements of vector x
asum(x) = sum_{i = 1}^{n}( x_i )
[in] | n | number of elements in input vectors. |
[in] | x | vector with n elements |
[in] | incX | storage spacing between elements of x return the sum of absolute values (returns zero if n<=0 or incX <=0) |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
void(* lama::BLAS1Interface< T >::axpy)(const IndexType n, T alpha, const T *x, const IndexType incX, T *y, const IndexType incY, SyncToken *syncToken) |
axpy multiplies vector x by scalar alpha and adds the result to vector y.
y = alpha * y + beta * x
[in] | n | number of elements in input vectors. |
[in] | alpha | scalar multiplier |
[in] | x | vector with n elements |
[in] | incX | storage spacing between elements of x |
[in] | y | vector with n elements |
[in] | incY | storage spacing between elements of y |
[out] | y | result (unchanged if n<=0) |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
void(* lama::BLAS1Interface< T >::copy)(const IndexType n, const T *x, const IndexType incX, T *y, const IndexType incY, SyncToken *syncToken) |
copy copies the vector x to the vector y.
y = x
[in] | n | number of elements in input vectors. |
[in] | x | vector with n elements |
[in] | incX | storage spacing between elements of x |
[in] | y | vector with n elements |
[in] | incY | storage spacing between elements of y |
[out] | y | contains vector x |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
T(* lama::BLAS1Interface< T >::dot)(const IndexType n, const T *x, const IndexType incX, const T *y, const IndexType inc, SyncToken *syncToken) |
dot computes the dot product of two vectors.
It returns the dot product of the vectors x and y if successfull, and 0.0 otherwise. adds the result to vector y.
dot = sum_{i = 1}^{n}( x_i * y_i )
[in] | n | number of elements in input vectors. |
[in] | x | vector with n elements |
[in] | incX | storage spacing between elements of x |
[in] | y | vector with n elements |
[in] | incY | storage spacing between elements of y return dot product (returns zero if n <= 0) |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
IndexType(* lama::BLAS1Interface< T >::iamax)(const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken) |
max norm: maxnorm(x) = max( x_i )
iamax finds the smallest index of the maximum magnitude element of vector x
[in] | n | number of elements in input vectors. |
[in] | x | vector with n elements |
[in] | incX | storage spacing between elements of x return the smallest index (returns zero if n <=0 or incX <=0) |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
T(* lama::BLAS1Interface< T >::nrm2)(const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken) |
nrm2 computes the Euclidean norm of the n-vector x (with storage increment incX).
nrm2(x) = sqrt( sum_{i = 1}^{n}( x_i^2 ) )
[in] | n | number of elements in input vectors. |
[in] | x | vector with n elements |
[in] | incX | storage spacing between elements of x return the Euclidian norm (returns zero if n <=0, incX <=0) |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
void(* lama::BLAS1Interface< T >::rot)(const IndexType N, T *X, const IndexType incX, T *Y, const IndexType incY, const T c, const T s, SyncToken *syncToken) |
rot multiplies a 2×2 matrix with the 2×n matrix
[in] | n | number of elements in input vectors. |
[in,out] | x | vector with n elements |
[in] | incX | storage spacing between elements of x |
[in,out] | y | vector with n elements |
[in] | incY | storage spacing between elements of y |
[in] | c | element of rotation matrix |
[in] | s | element of rotation matrix |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
void(* lama::BLAS1Interface< T >::rotg)(T *a, T *b, T *c, T *s, SyncToken *syncToken) |
rotg The function cublasSrot(n, x, incX, y, incY, c, s) normally is called next to apply the transformation to a 2×n matrix
[in,out] | a | scalar |
[in,out] | b | scalar |
[in,out] | c | result |
[in,out] | s | result |
Referenced by lama::OpenMPInterface::OpenMPInterface().
void(* lama::BLAS1Interface< T >::rotm)(const IndexType N, T *X, const IndexType incX, T *Y, const IndexType incY, const T *P, SyncToken *syncToken) |
rotm ?
[in] | n | number of elements in input vectors. |
[in] | x | vector with n elements |
[in] | incX | storage spacing between elements of x |
[in] | y | vector with n elements |
[in] | incY | storage spacing between elements of y |
[in] | *P | ? //in cublas sparam |
[out] | x | rotated vector x(unchanged if n<=0) |
[out] | y | rotated vector y(unchanged if n<=0) |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
void(* lama::BLAS1Interface< T >::rotmg)(T *d1, T *d2, T *b1, const T b2, T *p, SyncToken *syncToken) |
Computes the parameters for a modified Givens rotation.
[in] | d1 | scaling factor for the x-coordinate of the input vector |
[in] | d2 | scaling factor for the y-coordinate of the input vector |
[in] | b1 | scalar |
[in] | b2 | scalar |
[out] | d1 | changed to represent the effect of the transformation |
[out] | d2 | changed to represent the effect of the transformation |
[out] | b1 | changed to represent the effect of the transformation |
[out] | P | ? //in cublas sparam |
Referenced by lama::OpenMPInterface::OpenMPInterface().
void(* lama::BLAS1Interface< T >::scal)(const IndexType n, const T alpha, T *x, const IndexType incX, SyncToken *syncToken) |
scal replaces vector x with alpha * x.
x = alpha * x
[in] | n | number of elements in input vectors. |
[in] | alpha | scalar multiplier |
[in] | x | vector with n elements |
[in] | incX | storage spacing between elements of x |
[out] | x | vector x(unchanged if n<=0 or incX <=0) |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
void(* lama::BLAS1Interface< T >::sum)(const IndexType n, T alpha, const T *x, T beta, const T *y, T *z, SyncToken *syncToken) |
TODO: need dotu ?!
sum: z = alpha * x + beta * y
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
void(* lama::BLAS1Interface< T >::swap)(const IndexType n, T *x, const IndexType incX, T *y, const IndexType incY, SyncToken *syncToken) |
swap interchanges vector x with vector y.
x <-> y
[in] | n | number of elements in input vectors. |
[in] | x | vector with n elements |
[in] | incX | storage spacing between elements of x |
[in] | y | vector with n elements |
[in] | incY | storage spacing between elements of y |
[out] | x | vector x(unchanged if n<=0) |
[out] | y | vector y(unchanged if n<=0) |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().
T(* lama::BLAS1Interface< T >::viamax)(const IndexType n, const T *x, const IndexType incX, SyncToken *syncToken) |
iamax finds the smallest index of the maximum magnitude element of vector x
[in] | n | number of elements in input vectors. |
[in] | x | vector with n elements |
[in] | incX | storage spacing between elements of x return the smallest index (returns zero if n <=0 or incX <=0) |
Referenced by lama::CUDAInterface::CUDAInterface(), and lama::OpenMPInterface::OpenMPInterface().