KokkosBatched::Syr2¶
Defined in header: KokkosBatched_Syr2.hpp
template <typename ArgUplo, typename ArgTrans>
struct SerialSyr2 {
template <typename ScalarType, typename XViewType, typename YViewType, typename AViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const XViewType &x, const YViewType &y,
const AViewType &a);
};
Perform a symmetric rank-2 update of matrix \(A\) by vectors \(x\) and \(y\) with scaling factor \(alpha\)
If
ArgTrans == KokkosBatched::Trans::Transpose, this operation is equivalent to the BLAS routineSSYR2(CSYR2) orDSYR2(ZSYR2) for single or double precision for real (complex) matrix.If
ArgTrans == KokkosBatched::Trans::ConjTranspose, this operation is equivalent to the BLAS routineCHER2orZHER2for single or double precision for complex matrix.
Parameters¶
- alpha:
Scaling factor of the rank-2 update
- x:
On input, \(x\) is a length n vector.
- y:
On input, \(y\) is a length n vector.
- A:
With
ArgUplo == KokkosBatched::Uplo::Upper, the upper triangular part of \(A\) is referenced and updated. WithArgUplo == KokkosBatched::Uplo::Lower, the lower triangular part of \(A\) is referenced and updated.
Type Requirements¶
ArgUplomust be one of the following:KokkosBatched::Uplo::Upperto update the upper triangular part of \(A\)KokkosBatched::Uplo::Lowerto update the lower triangular part of \(A\)
ArgTransmust be one of the following:KokkosBatched::Trans::Transposeto perform \(A = A + \alpha (x \cdot y^T) + \alpha (y \cdot x^T)\)KokkosBatched::Trans::ConjTransposeto perform \(A = A + \alpha (x \cdot y^H) + \bar{\alpha} (y \cdot x^H)\)
ScalarTypemust be a built-in floating point type (float,double,Kokkos::complex<float>,Kokkos::complex<double>)XViewTypemust be a Kokkos View of rank 1 containing a vector \(X\)YViewTypemust be a Kokkos View of rank 1 containing a vector \(Y\)AViewTypemust be a Kokkos View of rank 2 containing a matrix \(A\) that satisfiesstd::is_same_v<typename AViewType::value_type, typename AViewType::non_const_value_type>
Example¶
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <KokkosBatched_Syr2.hpp>
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
/// \brief Example of batched syr2
/// Perform a symmetric rank-2 update of matrix A by vectors x and y, where
/// A: [[1, -3, -2, 0],
/// [0, 1, -3, -2],
/// [0, 0, 1, -3],
/// [0, 0, 0, 1]]
/// x: [1, 2, 3, 4]
/// y: [4, 3, 2, 1]
/// alpha: 1.5
///
/// After, A = A + alpha * (x * y^T) + alpha * (y * x^T), it will give
/// A: [[13., 13.5, 19., 25.5],
/// [ 0., 19., 16.5, 19. ],
/// [ 0., 0., 19., 13.5 ],
/// [ 0., 0., 0., 13. ]]
///
int main(int /*argc*/, char** /*argv*/) {
Kokkos::initialize();
{
using View2DType = Kokkos::View<double**, ExecutionSpace>;
using View3DType = Kokkos::View<double***, ExecutionSpace>;
const int Nb = 10, n = 4;
// Matrix A
View3DType A("A", Nb, n, n), Ref("Ref", Nb, n, n);
// Vectors x and y
View2DType x("x", Nb, n), y("y", Nb, n);
// Initialize A, x, and y
auto h_A = Kokkos::create_mirror_view(A);
auto h_x = Kokkos::create_mirror_view(x);
auto h_y = Kokkos::create_mirror_view(y);
auto h_Ref = Kokkos::create_mirror_view(Ref);
// Upper triangular matrix
for (int ib = 0; ib < Nb; ib++) {
// Fill vectors x and y
for (int i = 0; i < n; i++) {
h_x(ib, i) = i + 1;
h_y(ib, i) = n - i;
}
// Fill the matrix A (upper triangular)
h_A(ib, 0, 0) = 1.0;
h_A(ib, 0, 1) = -3.0;
h_A(ib, 0, 2) = -2.0;
h_A(ib, 0, 3) = 0.0;
h_A(ib, 1, 0) = 0.0;
h_A(ib, 1, 1) = 1.0;
h_A(ib, 1, 2) = -3.0;
h_A(ib, 1, 3) = -2.0;
h_A(ib, 2, 0) = 0.0;
h_A(ib, 2, 1) = 0.0;
h_A(ib, 2, 2) = 1.0;
h_A(ib, 2, 3) = -3.0;
h_A(ib, 3, 0) = 0.0;
h_A(ib, 3, 1) = 0.0;
h_A(ib, 3, 2) = 0.0;
h_A(ib, 3, 3) = 1.0;
// Fill the reference matrix
h_Ref(ib, 0, 0) = 13.0;
h_Ref(ib, 0, 1) = 13.5;
h_Ref(ib, 0, 2) = 19.0;
h_Ref(ib, 0, 3) = 25.5;
h_Ref(ib, 1, 0) = 0.0;
h_Ref(ib, 1, 1) = 19.0;
h_Ref(ib, 1, 2) = 16.5;
h_Ref(ib, 1, 3) = 19.0;
h_Ref(ib, 2, 0) = 0.0;
h_Ref(ib, 2, 1) = 0.0;
h_Ref(ib, 2, 2) = 19.0;
h_Ref(ib, 2, 3) = 13.5;
h_Ref(ib, 3, 0) = 0.0;
h_Ref(ib, 3, 1) = 0.0;
h_Ref(ib, 3, 2) = 0.0;
h_Ref(ib, 3, 3) = 13.0;
}
Kokkos::deep_copy(A, h_A);
Kokkos::deep_copy(x, h_x);
Kokkos::deep_copy(y, h_y);
// Compute A = A + alpha * (x * y^T) + alpha * (y * x^T)
const double alpha = 1.5;
ExecutionSpace exec;
using policy_type = Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<int>>;
policy_type policy{exec, 0, Nb};
Kokkos::parallel_for(
"syr2", policy, KOKKOS_LAMBDA(int ib) {
auto sub_A = Kokkos::subview(A, ib, Kokkos::ALL, Kokkos::ALL);
auto sub_x = Kokkos::subview(x, ib, Kokkos::ALL);
auto sub_y = Kokkos::subview(y, ib, Kokkos::ALL);
// A = A + alpha * (x * y^T) + alpha * (y * x^T)
KokkosBatched::SerialSyr2<KokkosBatched::Uplo::Upper, KokkosBatched::Trans::Transpose>::invoke(alpha, sub_x,
sub_y, sub_A);
});
// Confirm that the results are correct
Kokkos::deep_copy(h_A, A);
bool correct = true;
double eps = 1.0e-12;
for (int ib = 0; ib < Nb; ib++) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
if (Kokkos::abs(h_A(ib, i, j) - h_Ref(ib, i, j)) > eps) correct = false;
}
}
}
if (correct) {
std::cout << "syr2 works correctly!" << std::endl;
}
}
Kokkos::finalize();
}
output:
syr2 works correctly!