KokkosBatched::Trsv¶
Defined in header: KokkosBatched_Trsv_Decl.hpp
template <typename ArgUplo, typename ArgTrans, typename ArgDiag, typename ArgAlgo>
struct SerialTrsv {
template <typename ScalarType, typename AViewType, typename bViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const ScalarType alpha, const AViewType &A,
const bViewType &b);
};
template <typename MemberType, typename ArgUplo, typename ArgTrans, typename ArgDiag, typename ArgAlgo>
struct TeamTrsv {
template <typename ScalarType, typename AViewType, typename bViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha,
const AViewType &A, const bViewType &b);
};
template <typename MemberType, typename ArgUplo, typename ArgTrans, typename ArgDiag, typename ArgAlgo>
struct TeamVectorTrsv {
template <typename ScalarType, typename AViewType, typename bViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member, const ScalarType alpha,
const AViewType &A, const bViewType &b);
};
Solves a system of the linear equations \(A \cdot X = \alpha B\) or \(A^T \cdot X = \alpha B\) or \(A^H \cdot X = \alpha B\) where \(A\) is an n-by-n unit or non-unit, upper or lower triangular matrix.
For a real matrix \(A\), this solves a system of the linear equations \(A \cdot X = \alpha B\) or \(A^T \cdot X = \alpha B\). This operation is equivalent to the BLAS routine
STRSV
orDTRSV
for single or double precision.For a complex matrix \(A\), this solves a system of the linear equations \(A \cdot X = \alpha B\) or \(A^T \cdot X = \alpha B\) or \(A^H \cdot X = \alpha B\). This operation is equivalent to the BLAS routine
CTRSV
orZTRSV
for single or double precision.
Note
No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.
Parameters¶
- member:
Kokkos team member handle (only for
TeamTrsv
andTeamVectorTrsv
).- alpha:
Scalar multiplier for \(b\) (usually set to one).
- A:
Input view containing the upper or lower triangular matrix.
- b:
Input/output view containing the right-hand side on input and the solution on output.
Type Requirements¶
MemberType
must be a Kokkos team member handle (only forTeamTrsv
andTeamVectorTrsv
).ArgUplo
must be one of the following:KokkosBatched::Uplo::Upper
for upper triangular solveKokkosBatched::Uplo::Lower
for lower triangular solve
ArgTrans
must be one of the following:KokkosBatched::Trans::NoTranspose
to solve a system \(A \cdot X = \alpha B\)KokkosBatched::Trans::Transpose
to solve a system \(A^T \cdot X = \alpha B\)KokkosBatched::Trans::ConjTranspose
to solve a system \(A^H \cdot X = \alpha B\)
ArgDiag
must be one of the following:KokkosBatched::Diag::Unit
for the unit triangular matrix \(A\)KokkosBatched::Diag::NonUnit
for the non-unit triangular matrix \(A\)
ArgAlgo
must be one of the following:KokkosBatched::Algo::tbsv::Blocked
for the blocked algorithmKokkosBatched::Algo::tbsv::Unblocked
for the unblocked algorithm
ScalarType
must be a built-in arithmetic type likefloat
,double
,Kokkos::complex<float>
, orKokkos::complex<double>
.AViewType
must be a Kokkos View of rank 2 containing the band matrix AbViewType
must be a Kokkos View of rank 1 containing the right-hand side that satisfies -std::is_same_v<typename bViewType::value_type, typename bViewType::non_const_value_type> == true
Note
Some combinations of template parameters may not be supported yet.
Example¶
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <KokkosBatched_Trsv_Decl.hpp>
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
/// \brief Example of batched trsv
/// Solving A * x = alpha * b, where
/// A: [[1, 0, 0],
/// [1, 2, 0],
/// [1, 2, 3]]
/// b: [1, 1, 1]
/// alpha: 1.5
///
/// x: [3/4, 1/4, 1/2]
///
/// In lower and transposed case
/// A^T: [[1, 1, 1],
/// [0, 2, 2],
/// [0, 0, 3]]
///
/// This corresponds to the following system of equations:
/// x0 + x1 + x2 = 1.5
/// 2 x1 + 2 x2 = 1.5
/// 3 x2 = 1.5
///
int main(int /*argc*/, char** /*argv*/) {
Kokkos::initialize();
{
using View2DType = Kokkos::View<double**, ExecutionSpace>;
using View3DType = Kokkos::View<double***, ExecutionSpace>;
const int Nb = 10, n = 3;
// Matrix A in banded storage
View3DType A("A", Nb, n, n);
// Vector x
View2DType x("x", Nb, n);
// Lower triangular matrix
// Initialize A and x
Kokkos::deep_copy(x, 1.0);
auto h_A = Kokkos::create_mirror_view(A);
// Upper triangular matrix
for (int ib = 0; ib < Nb; ib++) {
h_A(ib, 0, 0) = 1.0;
h_A(ib, 0, 1) = 0.0;
h_A(ib, 0, 2) = 0.0;
h_A(ib, 1, 0) = 1.0;
h_A(ib, 1, 1) = 2.0;
h_A(ib, 1, 2) = 0.0;
h_A(ib, 2, 0) = 1.0;
h_A(ib, 2, 1) = 2.0;
h_A(ib, 2, 2) = 3.0;
}
Kokkos::deep_copy(A, h_A);
// solve A^T * x = b with trsv
const double alpha = 1.5;
ExecutionSpace exec;
using policy_type = Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<int>>;
policy_type policy{exec, 0, Nb};
Kokkos::parallel_for(
"trsv", policy, KOKKOS_LAMBDA(int ib) {
auto sub_A = Kokkos::subview(A, ib, Kokkos::ALL, Kokkos::ALL);
auto sub_x = Kokkos::subview(x, ib, Kokkos::ALL);
// Solve A^T * x = alpha * b with trsv
KokkosBatched::SerialTrsv<KokkosBatched::Uplo::Lower, KokkosBatched::Trans::Transpose,
KokkosBatched::Diag::NonUnit, KokkosBatched::Algo::Trsv::Unblocked>::invoke(alpha,
sub_A,
sub_x);
});
// Confirm that the results are correct
auto h_x = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, x);
bool correct = true;
double eps = 1.0e-12;
for (int ib = 0; ib < Nb; ib++) {
if (Kokkos::abs(h_x(ib, 0) - 3.0 / 4.0) > eps) correct = false;
if (Kokkos::abs(h_x(ib, 1) - 1.0 / 4.0) > eps) correct = false;
if (Kokkos::abs(h_x(ib, 2) - 1.0 / 2.0) > eps) correct = false;
}
if (correct) {
std::cout << "trsv works correctly!" << std::endl;
}
}
Kokkos::finalize();
}
output:
trsv works correctly!