KokkosBatched::Gbtrs

Defined in header: KokkosBatched_Gbtrs.hpp

template <typename ArgTrans, typename ArgAlgo>
struct SerialGbtrs {
  template <typename AViewType, typename PivViewType, typename BViewType>
  KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A, const PivViewType &piv, const BViewType &b, const int kl,
                                           const int ku);
};

Solves a system of the linear equations \(A \cdot X = B\) or \(A^T \cdot X = B\) or \(A^H \cdot X = B\) with a general n-by-n band matrix \(A\) using \(LU\) factorization computed by Gbtrf. This operation is equivalent to the LAPACK routine SGBTRS (CGBTRS) or DGBTRS (ZGBTRS) for single or double precision for real (complex) matrix.

Parameters

A:

Input view containing the factors \(L\) and \(U\) from the factorization \(A = P \cdot L \cdot U\) of the band matrix \(A\) computed by Gbtrf. See LAPACK reference for the band storage format.

piv:

The pivot indices computed by Gbtrf.

B:

Input/output view containing the right-hand side on input and the solution on output.

kl:

The number of subdiagonals within the band of \(A\). kl >= 0

ku:

The number of superdiagonals within the band of \(A\). ku >= 0

Type Requirements

  • ArgTrans must be one of the following:
    • KokkosBatched::Trans::NoTranspose to solve a system \(A \cdot X = B\)

    • KokkosBatched::Trans::Transpose to solve a system \(A^T \cdot X = B\)

    • KokkosBatched::Trans::ConjTranspose to solve a system \(A^H \cdot X = B\)

  • ArgAlgo must be KokkosBatched::Algo::Gbtrs::Unblocked for the unblocked algorithm

  • AViewType must be a Kokkos View of rank 2 containing the general band matrix \(A\)

  • PivViewType must be a Kokkos View of rank 1 containing the pivot indices

  • BViewType must be a Kokkos View of rank 1 containing the right-hand side that satisfies - std::is_same_v<typename BViewType::value_type, typename BViewType::non_const_value_type> == true

Example

//@HEADER
// ************************************************************************
//
//                        Kokkos v. 4.0
//       Copyright (2022) National Technology & Engineering
//               Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include <Kokkos_Core.hpp>
#include <Kokkos_Random.hpp>
#include <KokkosBatched_Gbtrf.hpp>
#include <KokkosBatched_Gbtrs.hpp>

using ExecutionSpace = Kokkos::DefaultExecutionSpace;

/// \brief Example of batched gbtrf/gbtrs
/// Solving A * x = b, where
///   A: [[1, -3, -2,  0],
///       [-1, 1, -3, -2],
///       [2, -1,  1, -3],
///       [0,  2, -1,  1]]
///   b: [1, 1, 1, 1]
///   x: [67/81, 22/81, -40/81, -1/27]
///
/// In band storage,
///   Ab: [[0,   0,  0,  0],
///        [0,   0,  0,  0],
///        [0,   0, -2, -2],
///        [0,  -3, -3, -3],
///        [1,   1,  1,  1],
///        [-1, -1, -1,  0],
///        [2,   2,  0,  0]]
///
/// This corresponds to the following system of equations:
///      x0 - 3 x1 - 2 x2        = 1
///    - x0 +   x1 - 3 x2 - 2 x3 = 1
///    2 x0 -   x1 +   x3 - 3 x3 = 1
///           2 x1 -   x2 +   x3 = 1
///
int main(int /*argc*/, char** /*argv*/) {
  Kokkos::initialize();
  {
    using View2DType  = Kokkos::View<double**, ExecutionSpace>;
    using View3DType  = Kokkos::View<double***, ExecutionSpace>;
    using PivViewType = Kokkos::View<int**, ExecutionSpace>;
    const int Nb = 10, n = 4, kl = 2, ku = 2;
    const int ldab = 2 * kl + ku + 1;

    // Matrix Ab in band storage
    View3DType Ab("Ab", Nb, ldab, n);

    // Solution
    View2DType x("x", Nb, n);

    // Pivot
    PivViewType ipiv("ipiv", Nb, n);

    // Initialize Ab and x
    Kokkos::deep_copy(x, 1.0);
    auto h_Ab = Kokkos::create_mirror_view(Ab);

    // Upper triangular matrix
    for (int ib = 0; ib < Nb; ib++) {
      // Fill the band matrix
      h_Ab(ib, 2, 2) = -2;
      h_Ab(ib, 2, 3) = -2;
      h_Ab(ib, 3, 1) = -3;
      h_Ab(ib, 3, 2) = -3;
      h_Ab(ib, 3, 3) = -3;
      h_Ab(ib, 4, 0) = 1;
      h_Ab(ib, 4, 1) = 1;
      h_Ab(ib, 4, 2) = 1;
      h_Ab(ib, 4, 3) = 1;
      h_Ab(ib, 5, 0) = -1;
      h_Ab(ib, 5, 1) = -1;
      h_Ab(ib, 5, 2) = -1;
      h_Ab(ib, 6, 0) = 2;
      h_Ab(ib, 6, 1) = 2;
    }
    Kokkos::deep_copy(Ab, h_Ab);

    // solve A * x = b with gbtrf and gbtrs
    ExecutionSpace exec;
    using policy_type = Kokkos::RangePolicy<ExecutionSpace, Kokkos::IndexType<int>>;
    policy_type policy{exec, 0, Nb};
    Kokkos::parallel_for(
        "gbtrf-gbtrs", policy, KOKKOS_LAMBDA(int ib) {
          auto sub_Ab   = Kokkos::subview(Ab, ib, Kokkos::ALL, Kokkos::ALL);
          auto sub_ipiv = Kokkos::subview(ipiv, ib, Kokkos::ALL);
          auto sub_x    = Kokkos::subview(x, ib, Kokkos::ALL);

          // Factorize Ab by gbtrf
          KokkosBatched::SerialGbtrf<KokkosBatched::Algo::Gbtrf::Unblocked>::invoke(sub_Ab, sub_ipiv, kl, ku);

          // Solve A * x = b with gbtrs
          KokkosBatched::SerialGbtrs<KokkosBatched::Trans::NoTranspose, KokkosBatched::Algo::Gbtrs::Unblocked>::invoke(
              sub_Ab, sub_ipiv, sub_x, kl, ku);
        });

    // Confirm that the results are correct
    auto h_x     = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, x);
    bool correct = true;
    double eps   = 1.0e-12;
    for (int ib = 0; ib < Nb; ib++) {
      if (Kokkos::abs(h_x(ib, 0) - 67.0 / 81.0) > eps) correct = false;
      if (Kokkos::abs(h_x(ib, 1) - 22.0 / 81.0) > eps) correct = false;
      if (Kokkos::abs(h_x(ib, 2) + 40.0 / 81.0) > eps) correct = false;
      if (Kokkos::abs(h_x(ib, 3) + 1.0 / 27.0) > eps) correct = false;
    }

    if (correct) {
      std::cout << "gbtrf/gbtrs works correctly!" << std::endl;
    }
  }
  Kokkos::finalize();
}

output:

gbtrf/gbtrs works correctly!