KokkosBatched::Syr2¶

Defined in header: KokkosBatched_Syr2.hpp

template <typename ArgUplo, typename ArgTrans>
struct SerialSyr2 {
  template <typename ScalarType, typename XViewType, typename YViewType, typename AViewType>
  KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const XViewType &x, const YViewType &y,
                                           const AViewType &a);
};

Perform a symmetric rank-2 update of matrix \(A\) by vectors \(x\) and \(y\) with scaling factor \(alpha\)

\[\begin{split}\begin{align} A &= A + \alpha (x \cdot y^T) + \alpha (y \cdot x^T) \: \text{(if ArgTrans == KokkosBatched::Trans::Transpose)} \\ A &= A + \alpha (x \cdot y^H) + \bar{\alpha} (y \cdot x^H) \: \text{(if ArgTrans == KokkosBatched::Trans::ConjTranspose)} \end{align}\end{split}\]

If ArgTrans == KokkosBatched::Trans::Transpose, this operation is equivalent to the BLAS routine SSYR2 (CSYR2) or DSYR2 (ZSYR2) for single or double precision for real (complex) matrix.
If ArgTrans == KokkosBatched::Trans::ConjTranspose, this operation is equivalent to the BLAS routine CHER2 or ZHER2 for single or double precision for complex matrix.

Parameters¶

alpha:: Scaling factor of the rank-2 update
x:: On input, \(x\) is a length n vector.
y:: On input, \(y\) is a length n vector.
A:: With ArgUplo == KokkosBatched::Uplo::Upper, the upper triangular part of \(A\) is referenced and updated. With ArgUplo == KokkosBatched::Uplo::Lower, the lower triangular part of \(A\) is referenced and updated.

Type Requirements¶

ArgUplo must be one of the following:
- KokkosBatched::Uplo::Upper to update the upper triangular part of \(A\)
- KokkosBatched::Uplo::Lower to update the lower triangular part of \(A\)
ArgTrans must be one of the following:
- KokkosBatched::Trans::Transpose to perform \(A = A + \alpha (x \cdot y^T) + \alpha (y \cdot x^T)\)
- KokkosBatched::Trans::ConjTranspose to perform \(A = A + \alpha (x \cdot y^H) + \bar{\alpha} (y \cdot x^H)\)
ScalarType must be a built-in floating point type (float, double, Kokkos::complex<float>, Kokkos::complex<double>)
XViewType must be a Kokkos View of rank 1 containing a vector \(X\)
YViewType must be a Kokkos View of rank 1 containing a vector \(Y\)
AViewType must be a Kokkos View of rank 2 containing a matrix \(A\) that satisfies std::is_same_v<typename AViewType::value_type, typename AViewType::non_const_value_type>

Example¶

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project

#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <KokkosBatched_Syr2.hpp>

using ExecutionSpace = Kokkos::DefaultExecutionSpace;

/// \brief Example of batched syr2
/// Perform a symmetric rank-2 update of matrix A by vectors x and y, where
///   A: [[1, -3, -2,  0],
///       [0,  1, -3, -2],
///       [0,  0,  1, -3],
///       [0,  0,  0,  1]]
///   x: [1, 2, 3, 4]
///   y: [4, 3, 2, 1]
///   alpha: 1.5
///
/// After, A = A + alpha * (x * y^T) + alpha * (y * x^T), it will give
///   A: [[13.,  13.5,  19.,  25.5],
///       [ 0.,  19.,  16.5,  19. ],
///       [ 0.,   0.,  19.,  13.5 ],
///       [ 0.,   0.,   0.,  13.  ]]
///
int main(int /*argc*/, char** /*argv*/) {
  Kokkos::initialize();
  {
    using View2DType = Kokkos::View<double**, ExecutionSpace>;
    using View3DType = Kokkos::View<double***, ExecutionSpace>;
    const int Nb = 10, n = 4;

    // Matrix A
    View3DType A("A", Nb, n, n), Ref("Ref", Nb, n, n);

    // Vectors x and y
    View2DType x("x", Nb, n), y("y", Nb, n);

    // Initialize A, x, and y
    auto h_A   = Kokkos::create_mirror_view(A);
    auto h_x   = Kokkos::create_mirror_view(x);
    auto h_y   = Kokkos::create_mirror_view(y);
    auto h_Ref = Kokkos::create_mirror_view(Ref);

    // Upper triangular matrix
    for (int ib = 0; ib < Nb; ib++) {
      // Fill vectors x and y
      for (int i = 0; i < n; i++) {
        h_x(ib, i) = i + 1;
        h_y(ib, i) = n - i;
      }

      // Fill the matrix A (upper triangular)
      h_A(ib, 0, 0) = 1.0;
      h_A(ib, 0, 1) = -3.0;
      h_A(ib, 0, 2) = -2.0;
      h_A(ib, 0, 3) = 0.0;

      h_A(ib, 1, 0) = 0.0;
      h_A(ib, 1, 1) = 1.0;
      h_A(ib, 1, 2) = -3.0;
      h_A(ib, 1, 3) = -2.0;

      h_A(ib, 2, 0) = 0.0;
      h_A(ib, 2, 1) = 0.0;
      h_A(ib, 2, 2) = 1.0;
      h_A(ib, 2, 3) = -3.0;

      h_A(ib, 3, 0) = 0.0;
      h_A(ib, 3, 1) = 0.0;
      h_A(ib, 3, 2) = 0.0;
      h_A(ib, 3, 3) = 1.0;

      // Fill the reference matrix
      h_Ref(ib, 0, 0) = 13.0;
      h_Ref(ib, 0, 1) = 13.5;
      h_Ref(ib, 0, 2) = 19.0;
      h_Ref(ib, 0, 3) = 25.5;

      h_Ref(ib, 1, 0) = 0.0;
      h_Ref(ib, 1, 1) = 19.0;
      h_Ref(ib, 1, 2) = 16.5;
      h_Ref(ib, 1, 3) = 19.0;

      h_Ref(ib, 2, 0) = 0.0;
      h_Ref(ib, 2, 1) = 0.0;
      h_Ref(ib, 2, 2) = 19.0;
      h_Ref(ib, 2, 3) = 13.5;

      h_Ref(ib, 3, 0) = 0.0;
      h_Ref(ib, 3, 1) = 0.0;
      h_Ref(ib, 3, 2) = 0.0;
      h_Ref(ib, 3, 3) = 13.0;
    }
    Kokkos::deep_copy(A, h_A);
    Kokkos::deep_copy(x, h_x);
    Kokkos::deep_copy(y, h_y);

    // Compute A = A + alpha * (x * y^T) + alpha * (y * x^T)
    const double alpha = 1.5;
    ExecutionSpace exec;
    using policy_type = Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<int>>;
    policy_type policy{exec, 0, Nb};
    Kokkos::parallel_for(
        "syr2", policy, KOKKOS_LAMBDA(int ib) {
          auto sub_A = Kokkos::subview(A, ib, Kokkos::ALL, Kokkos::ALL);
          auto sub_x = Kokkos::subview(x, ib, Kokkos::ALL);
          auto sub_y = Kokkos::subview(y, ib, Kokkos::ALL);

          // A = A + alpha * (x * y^T) + alpha * (y * x^T)
          KokkosBatched::SerialSyr2<KokkosBatched::Uplo::Upper, KokkosBatched::Trans::Transpose>::invoke(alpha, sub_x,
                                                                                                         sub_y, sub_A);
        });

    // Confirm that the results are correct
    Kokkos::deep_copy(h_A, A);
    bool correct = true;
    double eps   = 1.0e-12;
    for (int ib = 0; ib < Nb; ib++) {
      for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
          if (Kokkos::abs(h_A(ib, i, j) - h_Ref(ib, i, j)) > eps) correct = false;
        }
      }
    }

    if (correct) {
      std::cout << "syr2 works correctly!" << std::endl;
    }
  }
  Kokkos::finalize();
}

output:

syr2 works correctly!