KokkosBatched::ApplyQ¶
Defined in header: KokkosBatched_ApplyQ_Decl.hpp
// Serial version (Left, NoTranspose)
template <typename AViewType, typename tViewType, typename vViewType, typename wViewType>
KOKKOS_INLINE_FUNCTION
int
ApplyQ(const AViewType& A,
const tViewType& t,
const vViewType& v,
const wViewType& w);
// Serial version (Left, Transpose)
template <typename TrType, typename AViewType, typename tViewType, typename vViewType, typename wViewType>
KOKKOS_INLINE_FUNCTION
int
ApplyQ(const TrType tr,
const AViewType& A,
const tViewType& t,
const vViewType& v,
const wViewType& w);
// Team version (Left, NoTranspose)
template <typename MemberType, typename AViewType, typename tViewType, typename vViewType, typename wViewType>
KOKKOS_INLINE_FUNCTION
int
ApplyQ(const MemberType& member,
const AViewType& A,
const tViewType& t,
const vViewType& v,
const wViewType& w);
// Team version (Left, Transpose)
template <typename MemberType, typename TrType, typename AViewType, typename tViewType, typename vViewType, typename wViewType>
KOKKOS_INLINE_FUNCTION
int
ApplyQ(const MemberType& member,
const TrType tr,
const AViewType& A,
const tViewType& t,
const vViewType& v,
const wViewType& w);
The ApplyQ
operation applies an orthogonal matrix Q (obtained from QR decomposition) to another matrix. Instead of forming Q explicitly, it applies Q using its factored representation as a series of Householder reflectors, which is more numerically stable and computationally efficient.
Mathematically, when applied from the left:
When applied from the right:
where Q is the orthogonal matrix implicitly represented by Householder vectors v and scaling factors t.
Parameters¶
- member:
Team execution policy instance (only for team version)
- tr:
Optional parameter indicating transposition (typically Trans::Transpose)
- A:
Input/output matrix to which Q will be applied
- t:
View containing Householder scaling factors (tau)
- v:
View containing Householder vectors
- w:
Workspace view for computation
Type Requirements¶
MemberType
must be a Kokkos TeamPolicy member typeTrType
must be a transposition type (typically Trans::NoTranspose or Trans::Transpose)AViewType
must be a rank-2 view representing the matrix to which Q is appliedtViewType
must be a rank-1 view containing Householder scaling factorsvViewType
must be a rank-2 view containing Householder vectorswViewType
must be a rank-1 workspace view with sufficient size
Examples¶
#include <Kokkos_Core.hpp>
#include <KokkosBatched_ApplyQ_Decl.hpp>
#include <KokkosBatched_QR_Decl.hpp>
using execution_space = Kokkos::DefaultExecutionSpace;
using memory_space = execution_space::memory_space;
// Scalar type to use
using scalar_type = double;
int main(int argc, char* argv[]) {
Kokkos::initialize(argc, argv);
{
// Define matrix dimensions
int n = 5; // Matrix rows
int m = 3; // Matrix columns
int k = 2; // Number of columns in B
// Create views for matrices and vectors
Kokkos::View<scalar_type**, Kokkos::LayoutRight, memory_space>
A("A", n, m), // Matrix for QR factorization
v("v", n, m), // Householder vectors from QR factorization
C("C", n, k); // Matrix to apply Q to
Kokkos::View<scalar_type*, memory_space>
t("t", m), // Householder scalars (tau)
w("w", n); // Workspace
// Fill A with data
auto A_host = Kokkos::create_mirror_view(A);
for (int i = 0; i < n; ++i) {
for (int j = 0; j < m; ++j) {
A_host(i, j) = (i+1) * 0.1 + (j+1) * 0.01;
}
}
Kokkos::deep_copy(A, A_host);
// Fill C with data
auto C_host = Kokkos::create_mirror_view(C);
for (int i = 0; i < n; ++i) {
for (int j = 0; j < k; ++j) {
C_host(i, j) = (i+1) + (j+1) * 10;
}
}
Kokkos::deep_copy(C, C_host);
// Copy A to v for QR factorization
Kokkos::deep_copy(v, A);
// Perform QR factorization to get Householder vectors and scaling factors
Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i) {
KokkosBatched::SerialQR<KokkosBatched::Algo::QR::Unblocked>::invoke(v, t);
});
// Save a copy of C for verification
Kokkos::View<scalar_type**, Kokkos::LayoutRight, memory_space> C_orig("C_orig", n, k);
Kokkos::deep_copy(C_orig, C);
// Apply Q from the left to C
Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i) {
// C = Q * C
KokkosBatched::SerialApplyQ<KokkosBatched::Side::Left, KokkosBatched::Trans::NoTranspose,
KokkosBatched::Algo::Level2::Unblocked>::invoke(C, t, v, w);
});
// Apply Q^T from the left to revert back to original C
Kokkos::parallel_for(1, KOKKOS_LAMBDA(const int i) {
// C = Q^T * C
KokkosBatched::SerialApplyQ<KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose,
KokkosBatched::Algo::Level2::Unblocked>::invoke(C, t, v, w);
});
// Verify that applying Q followed by Q^T returns to the original matrix
Kokkos::deep_copy(C_host, C);
auto C_orig_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), C_orig);
// Check if C approximately matches C_orig
bool test_passed = true;
for (int i = 0; i < n; ++i) {
for (int j = 0; j < k; ++j) {
if (std::abs(C_host(i, j) - C_orig_host(i, j)) > 1e-10) {
test_passed = false;
std::cout << "Mismatch at (" << i << ", " << j << "): "
<< C_host(i, j) << " vs " << C_orig_host(i, j) << std::endl;
}
}
}
if (test_passed) {
std::cout << "ApplyQ test: PASSED" << std::endl;
} else {
std::cout << "ApplyQ test: FAILED" << std::endl;
}
}
Kokkos::finalize();
return 0;
}
Batched Example with Team Version¶
#include <Kokkos_Core.hpp>
#include <KokkosBatched_ApplyQ_Decl.hpp>
#include <KokkosBatched_QR_Decl.hpp>
using execution_space = Kokkos::DefaultExecutionSpace;
using memory_space = execution_space::memory_space;
// Scalar type to use
using scalar_type = double;
int main(int argc, char* argv[]) {
Kokkos::initialize(argc, argv);
{
// Define dimensions
int batch_size = 10; // Number of matrices
int n = 5; // Matrix rows
int m = 3; // Matrix columns
int k = 2; // Number of columns in B
// Create batched views
Kokkos::View<scalar_type***, Kokkos::LayoutRight, memory_space>
A("A", batch_size, n, m), // Matrices for QR factorization
v("v", batch_size, n, m), // Householder vectors
C("C", batch_size, n, k); // Matrices to apply Q to
Kokkos::View<scalar_type**, memory_space>
t("t", batch_size, m); // Householder scalars (tau)
Kokkos::View<scalar_type**, memory_space>
w("w", batch_size, n); // Workspaces
// Fill matrices with data
auto A_host = Kokkos::create_mirror_view(A);
auto C_host = Kokkos::create_mirror_view(C);
for (int b = 0; b < batch_size; ++b) {
for (int i = 0; i < n; ++i) {
for (int j = 0; j < m; ++j) {
A_host(b, i, j) = (b+1) * 0.01 + (i+1) * 0.1 + (j+1) * 0.01;
}
for (int j = 0; j < k; ++j) {
C_host(b, i, j) = (b+1) * 0.1 + (i+1) + (j+1) * 10;
}
}
}
Kokkos::deep_copy(A, A_host);
Kokkos::deep_copy(C, C_host);
// Copy A to v for QR factorization
Kokkos::deep_copy(v, A);
// Save copy of C for verification
Kokkos::View<scalar_type***, Kokkos::LayoutRight, memory_space>
C_orig("C_orig", batch_size, n, k);
Kokkos::deep_copy(C_orig, C);
// Create team policy
using policy_type = Kokkos::TeamPolicy<execution_space>;
policy_type policy(batch_size, Kokkos::AUTO);
// Perform QR factorization
Kokkos::parallel_for("QR_factorization", policy,
KOKKOS_LAMBDA(const typename policy_type::member_type& member) {
const int b = member.league_rank();
auto v_b = Kokkos::subview(v, b, Kokkos::ALL(), Kokkos::ALL());
auto t_b = Kokkos::subview(t, b, Kokkos::ALL());
KokkosBatched::TeamQR<typename policy_type::member_type,
KokkosBatched::Algo::QR::Unblocked>
::invoke(member, v_b, t_b);
}
);
// Apply Q to C
Kokkos::parallel_for("Apply_Q", policy,
KOKKOS_LAMBDA(const typename policy_type::member_type& member) {
const int b = member.league_rank();
auto v_b = Kokkos::subview(v, b, Kokkos::ALL(), Kokkos::ALL());
auto t_b = Kokkos::subview(t, b, Kokkos::ALL());
auto C_b = Kokkos::subview(C, b, Kokkos::ALL(), Kokkos::ALL());
auto w_b = Kokkos::subview(w, b, Kokkos::ALL());
KokkosBatched::TeamApplyQ<typename policy_type::member_type,
KokkosBatched::Side::Left,
KokkosBatched::Trans::NoTranspose,
KokkosBatched::Algo::Level2::Unblocked>
::invoke(member, v_b, t_b, C_b, w_b);
}
);
// Apply Q^T to C (to verify)
Kokkos::parallel_for("Apply_QT", policy,
KOKKOS_LAMBDA(const typename policy_type::member_type& member) {
const int b = member.league_rank();
auto v_b = Kokkos::subview(v, b, Kokkos::ALL(), Kokkos::ALL());
auto t_b = Kokkos::subview(t, b, Kokkos::ALL());
auto C_b = Kokkos::subview(C, b, Kokkos::ALL(), Kokkos::ALL());
auto w_b = Kokkos::subview(w, b, Kokkos::ALL());
KokkosBatched::TeamApplyQ<typename policy_type::member_type,
KokkosBatched::Side::Left,
KokkosBatched::Trans::Transpose,
KokkosBatched::Algo::Level2::Unblocked>
::invoke(member, v_b, t_b, C_b, w_b);
}
);
// Verify results
Kokkos::deep_copy(C_host, C);
auto C_orig_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), C_orig);
bool test_passed = true;
for (int b = 0; b < batch_size; ++b) {
for (int i = 0; i < n; ++i) {
for (int j = 0; j < k; ++j) {
if (std::abs(C_host(b, i, j) - C_orig_host(b, i, j)) > 1e-10) {
test_passed = false;
std::cout << "Batch " << b << " mismatch at (" << i << ", " << j << "): "
<< C_host(b, i, j) << " vs " << C_orig_host(b, i, j) << std::endl;
break;
}
}
if (!test_passed) break;
}
if (!test_passed) break;
}
if (test_passed) {
std::cout << "Batched ApplyQ test: PASSED" << std::endl;
} else {
std::cout << "Batched ApplyQ test: FAILED" << std::endl;
}
}
Kokkos::finalize();
return 0;
}