KokkosBatched::Spmv¶
Defined in header: KokkosBatched_Spmv.hpp
// Serial version
template <typename ArgTrans = Trans::NoTranspose>
struct SerialSpmv {
template <typename ValuesViewType, typename IntView, typename xViewType,
typename yViewType, typename alphaViewType, typename betaViewType, int dobeta>
KOKKOS_INLINE_FUNCTION
static int
invoke(const alphaViewType& alpha,
const ValuesViewType& values,
const IntView& row_ptr,
const IntView& colIndices,
const xViewType& x,
const betaViewType& beta,
const yViewType& Y);
};
// Team version
template <typename MemberType, typename ArgTrans = Trans::NoTranspose>
struct TeamSpmv {
template <typename ValuesViewType, typename IntView, typename xViewType,
typename yViewType, typename alphaViewType, typename betaViewType, int dobeta>
KOKKOS_INLINE_FUNCTION
static int
invoke(const MemberType& member,
const alphaViewType& alpha,
const ValuesViewType& values,
const IntView& row_ptr,
const IntView& colIndices,
const xViewType& x,
const betaViewType& beta,
const yViewType& y);
};
// TeamVector version
template <typename MemberType, typename ArgTrans = Trans::NoTranspose, unsigned N_team = 1>
struct TeamVectorSpmv {
template <typename ValuesViewType, typename IntView, typename xViewType,
typename yViewType, typename alphaViewType, typename betaViewType, int dobeta>
KOKKOS_INLINE_FUNCTION
static int
invoke(const MemberType& member,
const alphaViewType& alpha,
const ValuesViewType& values,
const IntView& row_ptr,
const IntView& colIndices,
const xViewType& x,
const betaViewType& beta,
const yViewType& y);
};
// Selective interface
template <typename MemberType, typename ArgTrans, typename ArgMode>
struct Spmv {
template <typename ValuesViewType, typename IntView, typename xViewType,
typename yViewType, typename alphaViewType, typename betaViewType, int dobeta>
KOKKOS_INLINE_FUNCTION
static int
invoke(const MemberType& member,
const alphaViewType& alpha,
const ValuesViewType& values,
const IntView& row_ptr,
const IntView& colIndices,
const xViewType& x,
const betaViewType& beta,
const yViewType& y);
};
The Spmv
(Sparse Matrix-Vector Multiplication) operation performs batched sparse matrix-vector multiplication for matrices sharing the same sparsity pattern. It computes:
where:
\(N\) is the number of matrices in the batch
\(A_1, \ldots, A_N\) are sparse matrices with the same sparsity pattern
\(x_1, \ldots, x_N\) are input vectors
\(y_1, \ldots, y_N\) are output vectors
\(\alpha_1, \ldots, \alpha_N\) are scaling factors for the matrix-vector products
\(\beta_1, \ldots, \beta_N\) are scaling factors for the input y vectors
The matrices are stored in Compressed Row Storage (CRS) format, where the sparsity pattern (row_ptr and colIndices) is shared across all matrices in the batch, but the values differ.
Parameters¶
- member:
Team execution policy instance (only for team/teamvector versions)
- alpha:
Scaling factor(s) for the matrix-vector product(s)
- values:
Values of the batched CRS matrix
- row_ptr:
Row pointers of the CRS format (shared across all matrices)
- colIndices:
Column indices of the CRS format (shared across all matrices)
- x:
Input vector(s)
- beta:
Scaling factor(s) for the input y vector(s)
- y:
Input/output vector(s)
Template Parameters¶
MemberType
must be a Kokkos TeamPolicy member typeArgTrans
must be the transpose option (typicallyKokkosBatched::Trans::NoTranspose
)ArgMode
must be one of:KokkosBatched::Mode::Serial
for serial executionKokkosBatched::Mode::Team
for team-based executionKokkosBatched::Mode::TeamVector
for team-vector-based execution
ValuesViewType
must be a rank-2 view with dimensions (batch_size, nnz)IntView
must be a rank-1 view for row pointers and column indicesxViewType
andyViewType
must be rank-2 views with dimensions (batch_size, n)alphaViewType
andbetaViewType
must be rank-1 views or scalar valuesdobeta
must be 0 (don’t use beta) or 1 (use beta)
Example¶
#include <Kokkos_Core.hpp>
#include <KokkosBatched_Spmv.hpp>
using execution_space = Kokkos::DefaultExecutionSpace;
using memory_space = execution_space::memory_space;
// Scalar type to use
using scalar_type = double;
int main(int argc, char* argv[]) {
Kokkos::initialize(argc, argv);
{
// Matrix dimensions
int batch_size = 10; // Number of matrices
int n = 100; // Size of each matrix
int nnz_per_row = 5; // Non-zeros per row
int nnz = n * nnz_per_row; // Total non-zeros
// Create batched matrix in CRS format
Kokkos::View<int*, memory_space> row_ptr("row_ptr", n+1);
Kokkos::View<int*, memory_space> col_idx("col_idx", nnz);
Kokkos::View<scalar_type**, Kokkos::LayoutRight, memory_space>
values("values", batch_size, nnz);
// Create vectors
Kokkos::View<scalar_type**, Kokkos::LayoutRight, memory_space>
x("x", batch_size, n);
Kokkos::View<scalar_type**, Kokkos::LayoutRight, memory_space>
y("y", batch_size, n);
// Create alpha and beta
Kokkos::View<scalar_type*, memory_space> alpha("alpha", batch_size);
Kokkos::View<scalar_type*, memory_space> beta("beta", batch_size);
// Initialize on host
auto row_ptr_host = Kokkos::create_mirror_view(row_ptr);
auto col_idx_host = Kokkos::create_mirror_view(col_idx);
auto values_host = Kokkos::create_mirror_view(values);
auto x_host = Kokkos::create_mirror_view(x);
auto y_host = Kokkos::create_mirror_view(y);
auto alpha_host = Kokkos::create_mirror_view(alpha);
auto beta_host = Kokkos::create_mirror_view(beta);
// Initialize matrix sparsity pattern (shared across all matrices)
int nnz_count = 0;
for (int i = 0; i < n; ++i) {
row_ptr_host(i) = nnz_count;
// Add diagonal element
col_idx_host(nnz_count) = i;
nnz_count++;
// Add off-diagonal elements
for (int k = 1; k < nnz_per_row; ++k) {
int col = (i + k) % n; // Simple pattern
col_idx_host(nnz_count) = col;
nnz_count++;
}
}
row_ptr_host(n) = nnz_count; // Finalize row_ptr
// Initialize matrix values (different for each batch)
for (int b = 0; b < batch_size; ++b) {
for (int j = 0; j < nnz; ++j) {
// Diagonal elements are larger for stability
int row = 0;
while (j >= row_ptr_host(row+1)) row++;
if (col_idx_host(j) == row) {
values_host(b, j) = 10.0 + 0.1 * b; // Diagonal
} else {
values_host(b, j) = -1.0 + 0.05 * b; // Off-diagonal
}
}
}
// Initialize vectors and coefficients
for (int b = 0; b < batch_size; ++b) {
alpha_host(b) = 1.0 + 0.1 * b;
beta_host(b) = 0.5 + 0.05 * b;
for (int i = 0; i < n; ++i) {
x_host(b, i) = 1.0; // Simple vector
y_host(b, i) = 0.5; // Initial y value
}
}
// Copy to device
Kokkos::deep_copy(row_ptr, row_ptr_host);
Kokkos::deep_copy(col_idx, col_idx_host);
Kokkos::deep_copy(values, values_host);
Kokkos::deep_copy(x, x_host);
Kokkos::deep_copy(y, y_host);
Kokkos::deep_copy(alpha, alpha_host);
Kokkos::deep_copy(beta, beta_host);
// Save original y for verification
Kokkos::View<scalar_type**, Kokkos::LayoutRight, memory_space>
y_orig("y_orig", batch_size, n);
Kokkos::deep_copy(y_orig, y);
// Create team policy
using policy_type = Kokkos::TeamPolicy<execution_space>;
int team_size = policy_type::team_size_recommended(
[](const int &, const int &) {},
Kokkos::ParallelForTag());
policy_type policy(batch_size, team_size);
// Perform batched SpMV with TeamVector mode (y = alpha*A*x + beta*y)
Kokkos::parallel_for("BatchedSpMV", policy,
KOKKOS_LAMBDA(const typename policy_type::member_type& member) {
const int b = member.league_rank();
// Get current batch's vectors
auto x_b = Kokkos::subview(x, b, Kokkos::ALL());
auto y_b = Kokkos::subview(y, b, Kokkos::ALL());
// Get current batch's values
auto values_b = Kokkos::subview(values, b, Kokkos::ALL());
// Perform SpMV: y = alpha*A*x + beta*y
KokkosBatched::Spmv<typename policy_type::member_type,
KokkosBatched::Trans::NoTranspose,
KokkosBatched::Mode::TeamVector>
::template invoke<decltype(values_b), decltype(row_ptr),
decltype(x_b), decltype(y_b), decltype(alpha),
decltype(beta), 1>
(member, alpha(b), values_b, row_ptr, col_idx, x_b, beta(b), y_b);
}
);
// Copy results back to host
Kokkos::deep_copy(y_host, y);
auto y_orig_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), y_orig);
// Verify results for first batch
int b = 0;
std::cout << "SpMV Results for batch " << b << ":" << std::endl;
std::cout << "alpha = " << alpha_host(b) << ", beta = " << beta_host(b) << std::endl;
// Print first few entries
std::cout << "Original y: [";
for (int i = 0; i < std::min(n, 5); ++i) {
std::cout << y_orig_host(b, i) << " ";
}
std::cout << "...]" << std::endl;
std::cout << "Result y: [";
for (int i = 0; i < std::min(n, 5); ++i) {
std::cout << y_host(b, i) << " ";
}
std::cout << "...]" << std::endl;
// In a real application, you would implement a proper verification
// by computing the expected result manually and comparing
}
Kokkos::finalize();
return 0;
}