KokkosBatched::Tbsv¶
Defined in header: KokkosBatched_Tbsv.hpp
template <typename ArgUplo, typename ArgTrans, typename ArgDiag, typename ArgAlgo>
struct SerialTbsv {
template <typename AViewType, typename XViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const XViewType &X, const int k);
};
Solves a system of the linear equations \(A \cdot X = B\) or \(A^T \cdot X = B\) or \(A^H \cdot X = B\) where \(A\) is an n-by-n unit or non-unit, upper or lower triangular band matrix with \((k + 1)\) diagonals.
For a real band matrix \(A\), this solves a system of the linear equations \(A \cdot X = B\) or \(A^T \cdot X = B\). This operation is equivalent to the BLAS routine
STBSV
orDTBSV
for single or double precision.For a complex band matrix \(A\), this solves a system of the linear equations \(A \cdot X = B\) or \(A^T \cdot X = B\) or \(A^H \cdot X = B\). This operation is equivalent to the BLAS routine
CTBSV
orZTBSV
for single or double precision.
Note
No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.
Parameters¶
- A:
Input view containing the upper or lower triangular band matrix. See LAPACK reference for the band storage format.
- X:
Input/output view containing the right-hand side on input and the solution on output.
- k:
The number of superdiagonals or subdiagonals within the band of \(A\). \(k >= 0\)
Type Requirements¶
ArgUplo
must be one of the following:KokkosBatched::Uplo::Upper
for upper triangular solveKokkosBatched::Uplo::Lower
for lower triangular solve
ArgTrans
must be one of the following:KokkosBatched::Trans::NoTranspose
to solve a system \(A \cdot X = B\)KokkosBatched::Trans::Transpose
to solve a system \(A^T \cdot X = B\)KokkosBatched::Trans::ConjTranspose
to solve a system \(A^H \cdot X = B\)
ArgDiag
must be one of the following:KokkosBatched::Diag::Unit
for the unit triangular matrix \(A\)KokkosBatched::Diag::NonUnit
for the non-unit triangular matrix \(A\)
ArgAlgo
must beKokkosBatched::Algo::tbsv::Unblocked
for the unblocked algorithmAViewType
must be a Kokkos View of rank 2 containing the band matrix AXViewType
must be a Kokkos View of rank 1 containing the right-hand side that satisfies -std::is_same_v<typename XViewType::value_type, typename XViewType::non_const_value_type> == true
Example¶
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// SPDX-FileCopyrightText: Copyright Contributors to the Kokkos project
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <KokkosBatched_Tbsv.hpp>
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
/// \brief Example of batched tbsv
/// Solving A * x = b, where
/// A: [[1, 1, 1],
/// [0, 2, 2],
/// [0, 0, 3]]
/// b: [1, 1, 1]
/// x: [1/2, 1/6, 1/3]
///
/// In upper band storage,
/// Ab: [[0, 0, 1],
/// [0, 1, 2],
/// [1, 2, 3]]
///
/// This corresponds to the following system of equations:
/// x0 + x1 + x2 = 1
/// 2 x1 + 2 x2 = 1
/// 3 x2 = 1
///
int main(int /*argc*/, char** /*argv*/) {
Kokkos::initialize();
{
using View2DType = Kokkos::View<double**, ExecutionSpace>;
using View3DType = Kokkos::View<double***, ExecutionSpace>;
const int Nb = 10, n = 3, k = 2;
// Matrix A in banded storage
View3DType Ab("Ab", Nb, k + 1, n);
// Vector x
View2DType x("x", Nb, n);
// Upper triangular matrix
// Initialize Ab and x
Kokkos::deep_copy(x, 1.0);
auto h_Ab = Kokkos::create_mirror_view(Ab);
// Upper triangular matrix
for (int ib = 0; ib < Nb; ib++) {
// Fill the diagonal elements
h_Ab(ib, 2, 0) = 1.0;
h_Ab(ib, 2, 1) = 2.0;
h_Ab(ib, 2, 2) = 3.0;
// Fill the super-diagonal elements
h_Ab(ib, 1, 1) = 1.0;
h_Ab(ib, 1, 2) = 2.0;
h_Ab(ib, 0, 2) = 1.0;
}
Kokkos::deep_copy(Ab, h_Ab);
// solve A * x = b with tbsv
ExecutionSpace exec;
using policy_type = Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<int>>;
policy_type policy{exec, 0, Nb};
Kokkos::parallel_for(
"tbsv", policy, KOKKOS_LAMBDA(int ib) {
auto sub_Ab = Kokkos::subview(Ab, ib, Kokkos::ALL, Kokkos::ALL);
auto sub_x = Kokkos::subview(x, ib, Kokkos::ALL);
// Solve A * x = b with tbsv
KokkosBatched::SerialTbsv<KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose,
KokkosBatched::Diag::NonUnit, KokkosBatched::Algo::Tbsv::Unblocked>::invoke(sub_Ab,
sub_x,
k);
});
// Confirm that the results are correct
auto h_x = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, x);
bool correct = true;
double eps = 1.0e-12;
for (int ib = 0; ib < Nb; ib++) {
if (Kokkos::abs(h_x(ib, 0) - 1.0 / 2.0) > eps) correct = false;
if (Kokkos::abs(h_x(ib, 1) - 1.0 / 6.0) > eps) correct = false;
if (Kokkos::abs(h_x(ib, 2) - 1.0 / 3.0) > eps) correct = false;
}
if (correct) {
std::cout << "tbsv works correctly!" << std::endl;
}
}
Kokkos::finalize();
}
output:
tbsv works correctly!