KokkosBatched::Syr¶
Defined in header: KokkosBatched_Syr.hpp
template <typename ArgUplo, typename ArgTrans>
struct SerialSyr {
template <typename ScalarType, typename XViewType, typename AViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const XViewType &x, const AViewType &a);
};
Perform a symmetric rank-1 update of matrix \(A\) by vector \(x\) with scaling factor \(alpha\)
If
ArgTrans == KokkosBatched::Trans::Transpose
, this operation is equivalent to the BLAS routineSSYR
(CSYR
) orDSYR
(ZSYR
) for single or double precision for real (complex) matrix.If
ArgTrans == KokkosBatched::Trans::ConjTranspose
, this operation is equivalent to the BLAS routineCHER
orZHER
for single or double precision for complex matrix.
Parameters¶
- alpha:
Scaling factor of the rank-1 update
- x:
On input, \(x\) is a length n vector.
- A:
With
ArgUplo == KokkosBatched::Uplo::Upper
, the upper triangular part of \(A\) is referenced and updated. WithArgUplo == KokkosBatched::Uplo::Lower
, the lower triangular part of \(A\) is referenced and updated.
Type Requirements¶
ArgUplo
must be one of the following:KokkosBatched::Uplo::Upper
to update the upper triangular part of \(A\)KokkosBatched::Uplo::Lower
to update the lower triangular part of \(A\)
ArgTrans
must be one of the following:KokkosBatched::Trans::Transpose
to perform \(A = A + \alpha (x * x^T)\)KokkosBatched::Trans::ConjTranspose
to perform \(A = A + \alpha (x * x^H)\)
ScalarType
must be a built-in floating point type (float
,double
,Kokkos::complex<float>
,Kokkos::complex<double>
)XViewType
must be a Kokkos View of rank 1 containing a vector \(X\)AViewType
must be a Kokkos View of rank 2 containing a matrix \(A\) that satisfiesstd::is_same_v<typename AViewType::value_type, typename AViewType::non_const_value_type>
Example¶
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <KokkosBatched_Syr.hpp>
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
/// \brief Example of batched syr
/// Perform a symmetric rank-1 update of matrix A by vector x, where
/// A: [[1, -3, -2, 0],
/// [0, 1, -3, -2],
/// [0, 0, 1, -3],
/// [0, 0, 0, 1]]
/// x: [1, 2, 3, 4]
/// alpha: 1.5
///
/// After, A = A + alpha * (x * x^T), it will give
/// A: [[ 2.5, 0., 2.5, 6.],
/// [ 0., 7., 6., 10.],
/// [ 0., 0., 14.5, 15.],
/// [ 0., 0., 0., 25.]]
///
int main(int /*argc*/, char** /*argv*/) {
Kokkos::initialize();
{
using View2DType = Kokkos::View<double**, ExecutionSpace>;
using View3DType = Kokkos::View<double***, ExecutionSpace>;
const int Nb = 10, n = 4;
// Matrix A
View3DType A("A", Nb, n, n), Ref("Ref", Nb, n, n);
// Vector x
View2DType x("x", Nb, n);
// Initialize A and x
Kokkos::deep_copy(x, 1.0);
auto h_A = Kokkos::create_mirror_view(A);
auto h_x = Kokkos::create_mirror_view(x);
auto h_Ref = Kokkos::create_mirror_view(Ref);
// Upper triangular matrix
for (int ib = 0; ib < Nb; ib++) {
// Fill vector x
for (int i = 0; i < n; i++) {
h_x(ib, i) = i + 1;
}
// Fill the matrix A
h_A(ib, 0, 0) = 1.0;
h_A(ib, 0, 1) = -3.0;
h_A(ib, 0, 2) = -2.0;
h_A(ib, 0, 3) = 0.0;
h_A(ib, 1, 0) = 0.0;
h_A(ib, 1, 1) = 1.0;
h_A(ib, 1, 2) = -3.0;
h_A(ib, 1, 3) = -2.0;
h_A(ib, 2, 0) = 0.0;
h_A(ib, 2, 1) = 0.0;
h_A(ib, 2, 2) = 1.0;
h_A(ib, 2, 3) = -3.0;
h_A(ib, 3, 0) = 0.0;
h_A(ib, 3, 1) = 0.0;
h_A(ib, 3, 2) = 0.0;
h_A(ib, 3, 3) = 1.0;
// Fill the reference matrix
h_Ref(ib, 0, 0) = 2.5;
h_Ref(ib, 0, 1) = 0.0;
h_Ref(ib, 0, 2) = 2.5;
h_Ref(ib, 0, 3) = 6.0;
h_Ref(ib, 1, 0) = 0.0;
h_Ref(ib, 1, 1) = 7.0;
h_Ref(ib, 1, 2) = 6.0;
h_Ref(ib, 1, 3) = 10.0;
h_Ref(ib, 2, 0) = 0.0;
h_Ref(ib, 2, 1) = 0.0;
h_Ref(ib, 2, 2) = 14.5;
h_Ref(ib, 2, 3) = 15.0;
h_Ref(ib, 3, 0) = 0.0;
h_Ref(ib, 3, 1) = 0.0;
h_Ref(ib, 3, 2) = 0.0;
h_Ref(ib, 3, 3) = 25.0;
}
Kokkos::deep_copy(A, h_A);
Kokkos::deep_copy(x, h_x);
// Compute A = A + alpha * (x * x^T)
const double alpha = 1.5;
ExecutionSpace exec;
using policy_type = Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<int>>;
policy_type policy{exec, 0, Nb};
Kokkos::parallel_for(
"syr", policy, KOKKOS_LAMBDA(int ib) {
auto sub_A = Kokkos::subview(A, ib, Kokkos::ALL, Kokkos::ALL);
auto sub_x = Kokkos::subview(x, ib, Kokkos::ALL);
// A = A + alpha * (x * x^T)
KokkosBatched::SerialSyr<KokkosBatched::Uplo::Upper, KokkosBatched::Trans::Transpose>::invoke(alpha, sub_x,
sub_A);
});
// Confirm that the results are correct
Kokkos::deep_copy(h_A, A);
bool correct = true;
double eps = 1.0e-12;
for (int ib = 0; ib < Nb; ib++) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (Kokkos::abs(h_A(ib, i, j) - h_Ref(ib, i, j)) > eps) correct = false;
}
}
}
if (correct) {
std::cout << "syr works correctly!" << std::endl;
}
}
Kokkos::finalize();
}
output:
syr works correctly!