KokkosBatched::Xpay¶
Defined in header: KokkosBatched_Xpay.hpp
struct SerialXpay {
template <typename ViewType, typename alphaViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const alphaViewType &alpha,
const ViewType &X,
const ViewType &Y);
};
template <typename MemberType>
struct TeamXpay {
template <typename ViewType, typename alphaViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member,
const alphaViewType &alpha,
const ViewType &X,
const ViewType &Y);
};
template <typename MemberType>
struct TeamVectorXpay {
template <typename ViewType, typename alphaViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const MemberType &member,
const alphaViewType &alpha,
const ViewType &X,
const ViewType &Y);
};
Performs batched XPAY operations on sets of vectors. For each vector pair in the batch, computes:
where:
\(x_i\) and \(y_i\) are vectors in the i-th batch
\(\alpha_i\) is a scalar value for the i-th operation
The operation updates \(y_i\) in-place
This operation is similar to AXPY but with the scalar multiplier applied to Y instead of X. It is a variation of the BLAS Level 1 operation implemented for batched execution.
Parameters¶
- member:
Team execution policy instance (not used in Serial mode)
- alpha:
Input view containing scalar coefficients
- X:
Input view containing batch of vectors to be added
- Y:
Input/output view containing batch of vectors to be scaled and updated
Type Requirements¶
MemberType
must be a Kokkos TeamPolicy member typeViewType
must be:Rank-2 Kokkos View with dimensions (batch_size, vector_length)
Value type that supports multiplication and addition
alphaViewType
must be:Rank-1 Kokkos View with dimension (batch_size)
Value type compatible with ViewType elements for multiplication
Example¶
#include <Kokkos_Core.hpp>
#include <KokkosBatched_Xpay.hpp>
using execution_space = Kokkos::DefaultExecutionSpace;
using memory_space = execution_space::memory_space;
using device_type = Kokkos::Device<execution_space, memory_space>;
// Scalar type to use
using scalar_type = double;
int main(int argc, char* argv[]) {
Kokkos::initialize(argc, argv);
{
// Define dimensions
int batch_size = 1000; // Number of vector pairs
int vector_length = 128; // Length of each vector
// Create views for batched vectors and alpha values
Kokkos::View<scalar_type**, Kokkos::LayoutRight, device_type>
X("X", batch_size, vector_length),
Y("Y", batch_size, vector_length);
Kokkos::View<scalar_type*, Kokkos::LayoutRight, device_type>
alpha("alpha", batch_size);
// Fill vectors with data
Kokkos::RangePolicy<execution_space> policy(0, batch_size);
Kokkos::parallel_for("init_data", policy, KOKKOS_LAMBDA(const int i) {
// Set alpha value for this batch
alpha(i) = 3.0;
// Initialize the i-th vector pair
for (int j = 0; j < vector_length; ++j) {
X(i, j) = 2.0;
Y(i, j) = 4.0;
}
});
Kokkos::fence();
// Perform batched XPAY using TeamPolicy with TeamVector
using team_policy_type = Kokkos::TeamPolicy<execution_space>;
team_policy_type policy_team(batch_size, Kokkos::AUTO, Kokkos::AUTO);
Kokkos::parallel_for("batched_xpay", policy_team,
KOKKOS_LAMBDA(const typename team_policy_type::member_type& member) {
// Get batch index from team rank
const int i = member.league_rank();
// Extract batch slices
auto X_i = Kokkos::subview(X, i, Kokkos::ALL());
auto Y_i = Kokkos::subview(Y, i, Kokkos::ALL());
auto alpha_i = Kokkos::subview(alpha, i);
// Perform XPAY using TeamVector variant
KokkosBatched::TeamVectorXpay<typename team_policy_type::member_type>
::invoke(member, alpha_i, X_i, Y_i);
}
);
Kokkos::fence();
// Copy results to host for verification
auto Y_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), Y);
// Verify the first vector's results
// Expected: Y = X + alpha*Y = 2.0 + 3.0*4.0 = 2.0 + 12.0 = 14.0
const double expected_value = 14.0;
bool correct = true;
printf("Verifying XPAY results:\n");
for (int j = 0; j < std::min(5, vector_length); ++j) {
printf(" Y(0,%d) = %.1f\n", j, Y_host(0, j));
if (std::abs(Y_host(0, j) - expected_value) > 1e-10) {
printf(" ERROR: Value mismatch at element %d\n", j);
correct = false;
}
}
if (correct) {
printf("Verification successful: Y = X + alpha*Y correctly computed\n");
}
// Compare with AXPY (y = alpha*x + y) for educational purposes
Kokkos::View<scalar_type**, Kokkos::LayoutRight, device_type>
X2("X2", batch_size, vector_length),
Y2("Y2", batch_size, vector_length);
// Initialize vectors for AXPY
Kokkos::parallel_for("init_axpy_data", policy, KOKKOS_LAMBDA(const int i) {
for (int j = 0; j < vector_length; ++j) {
X2(i, j) = 4.0; // Same as Y in XPAY example
Y2(i, j) = 2.0; // Same as X in XPAY example
}
});
Kokkos::fence();
// Perform "mock" AXPY manually (just to show the difference)
Kokkos::parallel_for("manual_axpy", policy, KOKKOS_LAMBDA(const int i) {
for (int j = 0; j < vector_length; ++j) {
Y2(i, j) = alpha(i) * X2(i, j) + Y2(i, j);
// Result: Y2 = alpha*X2 + Y2 = 3.0*4.0 + 2.0 = 12.0 + 2.0 = 14.0
}
});
Kokkos::fence();
// Copy AXPY results to host for comparison
auto Y2_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), Y2);
printf("\nComparing with AXPY results:\n");
printf(" XPAY: Y = X + alpha*Y = 2.0 + 3.0*4.0 = 14.0\n");
printf(" AXPY: Y = alpha*X + Y = 3.0*4.0 + 2.0 = 14.0\n");
printf(" Same result with parameters swapped\n");
}
Kokkos::finalize();
return 0;
}