KokkosBatched::Trsv

Defined in header: KokkosBatched_Trsv_Decl.hpp

template <typename ArgUplo, typename ArgTrans, typename ArgDiag, typename ArgAlgo>
struct SerialTrsv {
  template <typename ScalarType, typename AViewType, typename bViewType>
  KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A,
                                           const bViewType &b);
};

template <typename MemberType, typename ArgUplo, typename ArgTrans, typename ArgDiag, typename ArgAlgo>
struct TeamTrsv {
  template <typename ScalarType, typename AViewType, typename bViewType>
  KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha,
                                           const AViewType &A, const bViewType &b);
};

template <typename MemberType, typename ArgUplo, typename ArgTrans, typename ArgDiag, typename ArgAlgo>
struct TeamVectorTrsv {
  template <typename ScalarType, typename AViewType, typename bViewType>
  KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha,
                                           const AViewType &A, const bViewType &b);
};

Solves a system of the linear equations \(A \cdot X = \alpha B\) or \(A^T \cdot X = \alpha B\) or \(A^H \cdot X = \alpha B\) where \(A\) is an n-by-n unit or non-unit, upper or lower triangular matrix.

  1. For a real matrix \(A\), this solves a system of the linear equations \(A \cdot X = \alpha B\) or \(A^T \cdot X = \alpha B\). This operation is equivalent to the BLAS routine STRSV or DTRSV for single or double precision.

  2. For a complex matrix \(A\), this solves a system of the linear equations \(A \cdot X = \alpha B\) or \(A^T \cdot X = \alpha B\) or \(A^H \cdot X = \alpha B\). This operation is equivalent to the BLAS routine CTRSV or ZTRSV for single or double precision.

Note

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

Parameters

member:

Kokkos team member handle (only for TeamTrsv and TeamVectorTrsv).

alpha:

Scalar multiplier for \(b\) (usually set to one).

A:

Input view containing the upper or lower triangular matrix.

b:

Input/output view containing the right-hand side on input and the solution on output.

Type Requirements

  • MemberType must be a Kokkos team member handle (only for TeamTrsv and TeamVectorTrsv).

  • ArgUplo must be one of the following:
    • KokkosBatched::Uplo::Upper for upper triangular solve

    • KokkosBatched::Uplo::Lower for lower triangular solve

  • ArgTrans must be one of the following:
    • KokkosBatched::Trans::NoTranspose to solve a system \(A \cdot X = \alpha B\)

    • KokkosBatched::Trans::Transpose to solve a system \(A^T \cdot X = \alpha B\)

    • KokkosBatched::Trans::ConjTranspose to solve a system \(A^H \cdot X = \alpha B\)

  • ArgDiag must be one of the following:
    • KokkosBatched::Diag::Unit for the unit triangular matrix \(A\)

    • KokkosBatched::Diag::NonUnit for the non-unit triangular matrix \(A\)

  • ArgAlgo must be one of the following:
    • KokkosBatched::Algo::tbsv::Blocked for the blocked algorithm

    • KokkosBatched::Algo::tbsv::Unblocked for the unblocked algorithm

  • ScalarType must be a built-in arithmetic type like float, double, Kokkos::complex<float>, or Kokkos::complex<double>.

  • AViewType must be a Kokkos View of rank 2 containing the band matrix A

  • bViewType must be a Kokkos View of rank 1 containing the right-hand side that satisfies - std::is_same_v<typename bViewType::value_type, typename bViewType::non_const_value_type> == true

Note

Some combinations of template parameters may not be supported yet.

Example

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project

#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <KokkosBatched_Trsv_Decl.hpp>

using ExecutionSpace = Kokkos::DefaultExecutionSpace;

/// \brief Example of batched trsv
/// Solving A * x = alpha * b, where
///   A: [[1,  0,  0],
///       [1,  2,  0],
///       [1,  2,  3]]
///   b: [1, 1, 1]
///   alpha: 1.5
///
///   x: [3/4, 1/4, 1/2]
///
/// In lower and transposed case
///  A^T: [[1,  1,  1],
///        [0,  2,  2],
///        [0,  0,  3]]
///
/// This corresponds to the following system of equations:
///      x0 +   x1 +   x2 = 1.5
///           2 x1 + 2 x2 = 1.5
///                  3 x2 = 1.5
///
int main(int /*argc*/, char** /*argv*/) {
  Kokkos::initialize();
  {
    using View2DType = Kokkos::View<double**, ExecutionSpace>;
    using View3DType = Kokkos::View<double***, ExecutionSpace>;
    const int Nb = 10, n = 3;

    // Matrix A in banded storage
    View3DType A("A", Nb, n, n);

    // Vector x
    View2DType x("x", Nb, n);

    // Lower triangular matrix
    // Initialize A and x
    Kokkos::deep_copy(x, 1.0);
    auto h_A = Kokkos::create_mirror_view(A);

    // Upper triangular matrix
    for (int ib = 0; ib < Nb; ib++) {
      h_A(ib, 0, 0) = 1.0;
      h_A(ib, 0, 1) = 0.0;
      h_A(ib, 0, 2) = 0.0;
      h_A(ib, 1, 0) = 1.0;
      h_A(ib, 1, 1) = 2.0;
      h_A(ib, 1, 2) = 0.0;
      h_A(ib, 2, 0) = 1.0;
      h_A(ib, 2, 1) = 2.0;
      h_A(ib, 2, 2) = 3.0;
    }
    Kokkos::deep_copy(A, h_A);

    // solve A^T * x = b with trsv
    const double alpha = 1.5;
    ExecutionSpace exec;
    using policy_type = Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<int>>;
    policy_type policy{exec, 0, Nb};
    Kokkos::parallel_for(
        "trsv", policy, KOKKOS_LAMBDA(int ib) {
          auto sub_A = Kokkos::subview(A, ib, Kokkos::ALL, Kokkos::ALL);
          auto sub_x = Kokkos::subview(x, ib, Kokkos::ALL);

          // Solve A^T * x = alpha * b with trsv
          KokkosBatched::SerialTrsv<KokkosBatched::Uplo::Lower, KokkosBatched::Trans::Transpose,
                                    KokkosBatched::Diag::NonUnit, KokkosBatched::Algo::Trsv::Unblocked>::invoke(alpha,
                                                                                                                sub_A,
                                                                                                                sub_x);
        });

    // Confirm that the results are correct
    auto h_x     = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, x);
    bool correct = true;
    double eps   = 1.0e-12;
    for (int ib = 0; ib < Nb; ib++) {
      if (Kokkos::abs(h_x(ib, 0) - 3.0 / 4.0) > eps) correct = false;
      if (Kokkos::abs(h_x(ib, 1) - 1.0 / 4.0) > eps) correct = false;
      if (Kokkos::abs(h_x(ib, 2) - 1.0 / 2.0) > eps) correct = false;
    }

    if (correct) {
      std::cout << "trsv works correctly!" << std::endl;
    }
  }
  Kokkos::finalize();
}

output:

trsv works correctly!