KokkosBatched::Ger

Defined in header: KokkosBatched_Ger.hpp

template <typename ArgAlgo>
struct SerialGer {
  template <typename ScalarType, typename XViewType, typename YViewType, typename AViewType>
  KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const XViewType &x, const YViewType &y,
                                           const AViewType &a);
};

Perform a rank-1 update of matrix \(A\) by vectors \(x\) and \(y\) with scaling factor \(alpha\)

\[\begin{split}\begin{align} A &= A + \alpha (x * y^T) \: \text{(if ArgTrans == KokkosBatched::Trans::Transpose)} \\ A &= A + \alpha (x * y^H) \: \text{(if ArgTrans == KokkosBatched::Trans::ConjTranspose)} \end{align}\end{split}\]
  1. For real vectors \(x\) and \(y\), this operation is equivalent to the BLAS routine SGER or DGER for single or double precision.

  2. For complex vectors \(x\) and \(y\) with ArgTrans == KokkosBatched::Trans::Transpose, this operation is equivalent to the BLAS routine CGERU or ZGERU for single or double precision.

  3. For complex vectors \(x\) and \(y\) with ArgTrans == KokkosBatched::Trans::ConjTranspose, this operation is equivalent to the BLAS routine CGERC or ZGERC for single or double precision.

Parameters

alpha:

Scaling factor of the rank-1 update

x:

On input, \(x\) is a length m vector.

y:

On input, \(y\) is a length n vector.

A:

On input, \(A\) is a m by n matrix being updated.

Type Requirements

  • ArgTrans must be one of the following:
    • KokkosBatched::Trans::Transpose to perform \(A = A + \alpha (x * y^T)\)

    • KokkosBatched::Trans::ConjTranspose to perform \(A = A + \alpha (x * y^H)\)

  • ScalarType must be a built-in floating point type (float, double, Kokkos::complex<float>, Kokkos::complex<double>)

  • XViewType must be a Kokkos View of rank 1 containing a vector \(X\)

  • YViewType must be a Kokkos View of rank 1 containing a vector \(Y\)

  • AViewType must be a Kokkos View of rank 2 containing a matrix \(A\)

Example

//@HEADER
// ************************************************************************
//
//                        Kokkos v. 4.0
//       Copyright (2022) National Technology & Engineering
//               Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <KokkosBatched_Ger.hpp>

using ExecutionSpace = Kokkos::DefaultExecutionSpace;

/// \brief Example of batched ger
/// Perform a rank-1 update of matrix A by vectors x and y, where
///   A: [[1, -3, -2,  0],
///       [-1, 1, -3, -2],
///       [2, -1,  1, -3]]
///   x: [1, 2, 3]
///   y: [0, 1, 2, 3]
///   alpha: 1.5
///
/// After, A = A + alpha * (x * y^T), it will give
///   A: [[ 1.,  -1.5,  1.,   4.5],
///       [-1.,   4.,   3.,   7. ],
///       [ 2.,   3.5, 10.,  10.5],]
///
int main(int /*argc*/, char** /*argv*/) {
  Kokkos::initialize();
  {
    using View2DType = Kokkos::View<double**, ExecutionSpace>;
    using View3DType = Kokkos::View<double***, ExecutionSpace>;
    const int Nb = 10, m = 3, n = 4;

    // Matrix A
    View3DType A("A", Nb, m, n), Ref("Ref", Nb, m, n);

    // vectros x and y
    View2DType x("x", Nb, m), y("y", Nb, n);

    // Initialize A and x
    Kokkos::deep_copy(x, 1.0);
    auto h_A   = Kokkos::create_mirror_view(A);
    auto h_x   = Kokkos::create_mirror_view(x);
    auto h_y   = Kokkos::create_mirror_view(y);
    auto h_Ref = Kokkos::create_mirror_view(Ref);

    // Upper triangular matrix
    for (int ib = 0; ib < Nb; ib++) {
      // Fill vectors x and y
      for (int i = 0; i < m; i++) {
        h_x(ib, i) = i + 1;
      }
      for (int i = 0; i < n; i++) {
        h_y(ib, i) = i;
      }

      // Fill the matrix A
      h_A(ib, 0, 0) = 1.0;
      h_A(ib, 0, 1) = -3.0;
      h_A(ib, 0, 2) = -2.0;
      h_A(ib, 0, 3) = 0.0;
      h_A(ib, 1, 0) = -1.0;
      h_A(ib, 1, 1) = 1.0;
      h_A(ib, 1, 2) = -3.0;
      h_A(ib, 1, 3) = -2.0;
      h_A(ib, 2, 0) = 2.0;
      h_A(ib, 2, 1) = -1.0;
      h_A(ib, 2, 2) = 1.0;
      h_A(ib, 2, 3) = -3.0;

      // Fill the reference matrix
      h_Ref(ib, 0, 0) = 1.0;
      h_Ref(ib, 0, 1) = -1.5;
      h_Ref(ib, 0, 2) = 1.0;
      h_Ref(ib, 0, 3) = 4.5;
      h_Ref(ib, 1, 0) = -1.0;
      h_Ref(ib, 1, 1) = 4.0;
      h_Ref(ib, 1, 2) = 3.0;
      h_Ref(ib, 1, 3) = 7.0;
      h_Ref(ib, 2, 0) = 2.0;
      h_Ref(ib, 2, 1) = 3.5;
      h_Ref(ib, 2, 2) = 10.0;
      h_Ref(ib, 2, 3) = 10.5;
    }
    Kokkos::deep_copy(A, h_A);
    Kokkos::deep_copy(x, h_x);
    Kokkos::deep_copy(y, h_y);

    // Compute A = A + alpha * (x * y^T)
    const double alpha = 1.5;
    ExecutionSpace exec;
    using policy_type = Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<int>>;
    policy_type policy{exec, 0, Nb};
    Kokkos::parallel_for(
        "ger", policy, KOKKOS_LAMBDA(int ib) {
          auto sub_A = Kokkos::subview(A, ib, Kokkos::ALL, Kokkos::ALL);
          auto sub_x = Kokkos::subview(x, ib, Kokkos::ALL);
          auto sub_y = Kokkos::subview(y, ib, Kokkos::ALL);

          // A = A + alpha * (x * y^T)
          KokkosBatched::SerialGer<KokkosBatched::Trans::Transpose>::invoke(alpha, sub_x, sub_y, sub_A);
        });

    // Confirm that the results are correct
    Kokkos::deep_copy(h_A, A);
    bool correct = true;
    double eps   = 1.0e-12;
    for (int ib = 0; ib < Nb; ib++) {
      for (int i = 0; i < m; i++) {
        for (int j = 0; j < n; j++) {
          if (Kokkos::abs(h_A(ib, i, j) - h_Ref(ib, i, j)) > eps) correct = false;
        }
      }
    }

    if (correct) {
      std::cout << "ger works correctly!" << std::endl;
    }
  }
  Kokkos::finalize();
}

output:

ger works correctly!