KokkosBatched::Gbtrs¶
Defined in header: KokkosBatched_Gbtrs.hpp
template <typename ArgTrans, typename ArgAlgo>
struct SerialGbtrs {
template <typename AViewType, typename PivViewType, typename BViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const PivViewType &piv, const BViewType &b, const int kl,
const int ku);
};
Solves a system of the linear equations \(A \cdot X = B\) or \(A^T \cdot X = B\) or \(A^H \cdot X = B\) with a general n-by-n band matrix \(A\) using \(LU\) factorization computed by Gbtrf
.
This operation is equivalent to the LAPACK routine SGBTRS
(CGBTRS
) or DGBTRS
(ZGBTRS
) for single or double precision for real (complex) matrix.
Parameters¶
- A:
Input view containing the factors \(L\) and \(U\) from the factorization \(A = P \cdot L \cdot U\) of the band matrix \(A\) computed by
Gbtrf
. See LAPACK reference for the band storage format.- piv:
The pivot indices computed by
Gbtrf
.- B:
Input/output view containing the right-hand side on input and the solution on output.
- kl:
The number of subdiagonals within the band of \(A\). kl >= 0
- ku:
The number of superdiagonals within the band of \(A\). ku >= 0
Type Requirements¶
ArgTrans
must be one of the following:KokkosBatched::Trans::NoTranspose
to solve a system \(A \cdot X = B\)KokkosBatched::Trans::Transpose
to solve a system \(A^T \cdot X = B\)KokkosBatched::Trans::ConjTranspose
to solve a system \(A^H \cdot X = B\)
ArgAlgo
must beKokkosBatched::Algo::Gbtrs::Unblocked
for the unblocked algorithmAViewType
must be a Kokkos View of rank 2 containing the general band matrix \(A\)PivViewType
must be a Kokkos View of rank 1 containing the pivot indicesBViewType
must be a Kokkos View of rank 1 containing the right-hand side that satisfies -std::is_same_v<typename BViewType::value_type, typename BViewType::non_const_value_type> == true
Example¶
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER
#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <KokkosBatched_Gbtrf.hpp>
#include <KokkosBatched_Gbtrs.hpp>
using ExecutionSpace = Kokkos::DefaultExecutionSpace;
/// \brief Example of batched gbtrf/gbtrs
/// Solving A * x = b, where
/// A: [[1, -3, -2, 0],
/// [-1, 1, -3, -2],
/// [2, -1, 1, -3],
/// [0, 2, -1, 1]]
/// b: [1, 1, 1, 1]
/// x: [67/81, 22/81, -40/81, -1/27]
///
/// In band storage,
/// Ab: [[0, 0, 0, 0],
/// [0, 0, 0, 0],
/// [0, 0, -2, -2],
/// [0, -3, -3, -3],
/// [1, 1, 1, 1],
/// [-1, -1, -1, 0],
/// [2, 2, 0, 0]]
///
/// This corresponds to the following system of equations:
/// x0 - 3 x1 - 2 x2 = 1
/// - x0 + x1 - 3 x2 - 2 x3 = 1
/// 2 x0 - x1 + x3 - 3 x3 = 1
/// 2 x1 - x2 + x3 = 1
///
int main(int /*argc*/, char** /*argv*/) {
Kokkos::initialize();
{
using View2DType = Kokkos::View<double**, ExecutionSpace>;
using View3DType = Kokkos::View<double***, ExecutionSpace>;
using PivViewType = Kokkos::View<int**, ExecutionSpace>;
const int Nb = 10, n = 4, kl = 2, ku = 2;
const int ldab = 2 * kl + ku + 1;
// Matrix Ab in band storage
View3DType Ab("Ab", Nb, ldab, n);
// Solution
View2DType x("x", Nb, n);
// Pivot
PivViewType ipiv("ipiv", Nb, n);
// Initialize Ab and x
Kokkos::deep_copy(x, 1.0);
auto h_Ab = Kokkos::create_mirror_view(Ab);
// Upper triangular matrix
for (int ib = 0; ib < Nb; ib++) {
// Fill the band matrix
h_Ab(ib, 2, 2) = -2;
h_Ab(ib, 2, 3) = -2;
h_Ab(ib, 3, 1) = -3;
h_Ab(ib, 3, 2) = -3;
h_Ab(ib, 3, 3) = -3;
h_Ab(ib, 4, 0) = 1;
h_Ab(ib, 4, 1) = 1;
h_Ab(ib, 4, 2) = 1;
h_Ab(ib, 4, 3) = 1;
h_Ab(ib, 5, 0) = -1;
h_Ab(ib, 5, 1) = -1;
h_Ab(ib, 5, 2) = -1;
h_Ab(ib, 6, 0) = 2;
h_Ab(ib, 6, 1) = 2;
}
Kokkos::deep_copy(Ab, h_Ab);
// solve A * x = b with gbtrf and gbtrs
ExecutionSpace exec;
using policy_type = Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<int>>;
policy_type policy{exec, 0, Nb};
Kokkos::parallel_for(
"gbtrf-gbtrs", policy, KOKKOS_LAMBDA(int ib) {
auto sub_Ab = Kokkos::subview(Ab, ib, Kokkos::ALL, Kokkos::ALL);
auto sub_ipiv = Kokkos::subview(ipiv, ib, Kokkos::ALL);
auto sub_x = Kokkos::subview(x, ib, Kokkos::ALL);
// Factorize Ab by gbtrf
KokkosBatched::SerialGbtrf<KokkosBatched::Algo::Gbtrf::Unblocked>::invoke(sub_Ab, sub_ipiv, kl, ku);
// Solve A * x = b with gbtrs
KokkosBatched::SerialGbtrs<KokkosBatched::Trans::NoTranspose, KokkosBatched::Algo::Gbtrs::Unblocked>::invoke(
sub_Ab, sub_ipiv, sub_x, kl, ku);
});
// Confirm that the results are correct
auto h_x = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, x);
bool correct = true;
double eps = 1.0e-12;
for (int ib = 0; ib < Nb; ib++) {
if (Kokkos::abs(h_x(ib, 0) - 67.0 / 81.0) > eps) correct = false;
if (Kokkos::abs(h_x(ib, 1) - 22.0 / 81.0) > eps) correct = false;
if (Kokkos::abs(h_x(ib, 2) + 40.0 / 81.0) > eps) correct = false;
if (Kokkos::abs(h_x(ib, 3) + 1.0 / 27.0) > eps) correct = false;
}
if (correct) {
std::cout << "gbtrf/gbtrs works correctly!" << std::endl;
}
}
Kokkos::finalize();
}
output:
gbtrf/gbtrs works correctly!