KokkosBatched::Copy

Defined in header: KokkosBatched_Copy_Decl.hpp

template <typename ArgTrans = Trans::NoTranspose>
struct SerialCopy {
  template <typename AViewType, typename BViewType>
  KOKKOS_INLINE_FUNCTION static invoke(const AViewType &A, const BViewType &B);
};

template <typename MemberType, typename ArgTrans = Trans::NoTranspose>
struct TeamCopy {
  template <typename AViewType, typename BViewType>
  KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B);
};

template <typename MemberType, typename ArgTrans = Trans::NoTranspose>
struct TeamVectorCopy {
  template <typename AViewType, typename BViewType>
  KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const AViewType &A, const BViewType &B);
};

Performs \(B = op(A)\) where \(op(A)\) is one of \(A\), \(A^T\), or \(A^H\).

  1. For real vectors \(A\) and \(B\), this operation is equivalent to the BLAS routine SCOPY or DCOPY for single or double precision.

  2. For complex vectors \(A\) and \(B\), this operation is equivalent to the BLAS routine CCOPY or ZCOPY for single or double precision.

Parameters

A:

On input, \(A\) is a length \(n\) vector or a \(m\) by \(n\) matrix.

B:

On input, \(B\) is a length \(n\) vector or a \(m\) by \(n\) matrix. On output, \(B\) is overwritten by the updated vector or matrix.

Type Requirements

  • MemberType must be a Kokkos team member handle (only for TeamCopy and TeamVectorCopy)

  • ArgTrans must be one of the following:
    • KokkosBatched::Trans::NoTranspose for \(op(A) = A\)

    • KokkosBatched::Trans::Transpose for \(op(A) = A^T\)

    • KokkosBatched::Trans::ConjTranspose for \(op(A) = A^H\)

  • AViewType must be a Kokkos View of rank 1 or 2 containing a vector \(A\)

  • BViewType must be a Kokkos View of rank 1 or 2 containing a vector \(B\) that satisfies std::is_same_v<typename BViewType::value_type, typename BViewType::non_const_value_type>

Note

This kernel supports both vector and matrix operations. When the input views \(A\) and \(B\) are of rank 1, the kernel performs a vector operation (BLAS copy). When the input views \(A\) and \(B\) are of rank 2, the kernel performs a matrix operation where the matrix \(A\) is copied to \(B\) with an optional transpose or conjugate transpose. The template argument to specify the rank of the input views is deprecated from 5.1.0.

Example

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project

#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <KokkosBatched_Copy_Decl.hpp>

using ExecutionSpace = Kokkos::DefaultExecutionSpace;

/// \brief Example of batched copy
/// Performs B = Op(A) where Op is one of NoTranspose, Transpose, ConjTranspose
/// A and B are 1D or 2D views
///
/// Usage example: copy A to B
/// A: [1,2,3]
/// B: [1,2,3]
///
int main(int /*argc*/, char** /*argv*/) {
  Kokkos::initialize();
  {
    using View2DType = Kokkos::View<double**, ExecutionSpace>;
    const int Nb = 10, n = 3;

    // Vector A and B
    View2DType A("A", Nb, n), B("B", Nb, n);

    // Initialize A and B
    auto h_A = Kokkos::create_mirror_view(A);
    auto h_B = Kokkos::create_mirror_view(B);
    for (int ib = 0; ib < Nb; ib++) {
      // Fill vector A
      for (int i = 0; i < n; i++) {
        h_A(ib, i) = i + 1;
      }
    }
    Kokkos::deep_copy(A, h_A);

    // Compute B = A
    ExecutionSpace exec;
    using policy_type = Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<int>>;
    policy_type policy{exec, 0, Nb};
    Kokkos::parallel_for(
        "copy", policy, KOKKOS_LAMBDA(int ib) {
          // B = A
          auto sub_A = Kokkos::subview(A, ib, Kokkos::ALL());
          auto sub_B = Kokkos::subview(B, ib, Kokkos::ALL());
          KokkosBatched::SerialCopy<KokkosBatched::Trans::NoTranspose>::invoke(sub_A, sub_B);
        });

    // Confirm that the results are correct
    Kokkos::deep_copy(h_B, B);
    bool correct = true;
    double eps   = 1.0e-12;
    for (int ib = 0; ib < Nb; ib++) {
      if (Kokkos::abs(h_B(ib, 0) - 1) > eps) correct = false;
      if (Kokkos::abs(h_B(ib, 1) - 2) > eps) correct = false;
      if (Kokkos::abs(h_B(ib, 2) - 3) > eps) correct = false;
    }

    if (correct) {
      std::cout << "copy works correctly!" << std::endl;
    }
  }
  Kokkos::finalize();
}

output:

copy works correctly!