-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[MLIR][OpenMP] Lowering nontemporal clause to LLVM IR for SIMD directive #109085
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-openmp @llvm/pr-subscribers-mlir-openmp Author: None (harishch4) ChangesThis patch adds nontemporal metadata to loads and stores of nontemporal list items. Full diff: https://github.com/llvm/llvm-project/pull/109085.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 4be0159fb1dd9f..83b24136d58a80 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1187,7 +1187,8 @@ class OpenMPIRBuilder {
void applySimd(CanonicalLoopInfo *Loop,
MapVector<Value *, Value *> AlignedVars, Value *IfCond,
omp::OrderKind Order, ConstantInt *Simdlen,
- ConstantInt *Safelen);
+ ConstantInt *Safelen,
+ SmallVector<Value *> NontempralVars = {});
/// Generator for '#omp flush'
///
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 515b74cbb75883..6a32f3aa7703ac 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -5183,10 +5183,31 @@ OpenMPIRBuilder::getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
return 0;
}
+/// Attach nontemporal metadata to the load/store instructions of nontemporal
+/// variables of \p Block
+static void addNonTemporalMetadata(BasicBlock *Block, MDNode *Nontemporal,
+ SmallVector<Value *> NontemporalVars) {
+ for (Instruction &I : *Block) {
+ llvm::Value *mem_ptr = nullptr;
+ if (llvm::LoadInst *li = dyn_cast<llvm::LoadInst>(&I))
+ mem_ptr = li->getPointerOperand();
+ else if (llvm::StoreInst *si = dyn_cast<llvm::StoreInst>(&I))
+ mem_ptr = si->getPointerOperand();
+ if (mem_ptr) {
+ if (llvm::GetElementPtrInst *gep =
+ dyn_cast<llvm::GetElementPtrInst>(mem_ptr))
+ mem_ptr = gep->getPointerOperand();
+ if (is_contained(NontemporalVars, mem_ptr))
+ I.setMetadata(LLVMContext::MD_nontemporal, Nontemporal);
+ }
+ }
+}
+
void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
MapVector<Value *, Value *> AlignedVars,
Value *IfCond, OrderKind Order,
- ConstantInt *Simdlen, ConstantInt *Safelen) {
+ ConstantInt *Simdlen, ConstantInt *Safelen,
+ SmallVector<Value *> NontemporalVars) {
LLVMContext &Ctx = Builder.getContext();
Function *F = CanonicalLoop->getFunction();
@@ -5283,6 +5304,12 @@ void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
}
addLoopMetadata(CanonicalLoop, LoopMDList);
+ // Set nontemporal metadata to load and stores of nontemporal values
+ if (NontemporalVars.size()) {
+ MDNode *NontemporalNode = MDNode::getDistinct(Ctx, {});
+ for (BasicBlock *BB : Reachable)
+ addNonTemporalMetadata(BB, NontemporalNode, NontemporalVars);
+ }
}
/// Create the TargetMachine object to query the backend for optimization
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 0cba8d80681f13..1a597a3608274b 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -1867,11 +1867,19 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::MapVector<llvm::Value *, llvm::Value *> alignedVars;
llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder());
+
+ llvm::SmallVector<llvm::Value *> nontemporalVars;
+ mlir::OperandRange nontemporals = simdOp.getNontemporalVars();
+ for (mlir::Value nontemporal : nontemporals) {
+ llvm::Value *nt = moduleTranslation.lookupValue(nontemporal);
+ nontemporalVars.push_back(nt);
+ }
+
ompBuilder->applySimd(loopInfo, alignedVars,
simdOp.getIfExpr()
? moduleTranslation.lookupValue(simdOp.getIfExpr())
: nullptr,
- order, simdlen, safelen);
+ order, simdlen, safelen, nontemporalVars);
builder.restoreIP(afterIP);
return success();
diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
index 95e12e5bc4e742..34e42ed037cc66 100644
--- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir
+++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir
@@ -872,6 +872,28 @@ llvm.func @simd_order() {
// CHECK-NEXT: llvm.loop.vectorize.width{{.*}}i64 2
// -----
+// CHECK-LABEL: @simd_nontemporal
+llvm.func @simd_nontemporal() {
+ %0 = llvm.mlir.constant(10 : i64) : i64
+ %1 = llvm.mlir.constant(1 : i64) : i64
+ %2 = llvm.alloca %1 x i64 : (i64) -> !llvm.ptr
+ %3 = llvm.alloca %1 x i64 : (i64) -> !llvm.ptr
+ //CHECK: %[[A_ADDR:.*]] = alloca i64, i64 1, align 8
+ //CHECK: %[[B_ADDR:.*]] = alloca i64, i64 1, align 8
+ //CHECK: %[[B:.*]] = load i64, ptr %[[B_ADDR]], align 4, !nontemporal !1, !llvm.access.group !2
+ //CHECK: store i64 %[[B]], ptr %[[A_ADDR]], align 4, !nontemporal !1, !llvm.access.group !2
+ omp.simd nontemporal(%2, %3 : !llvm.ptr, !llvm.ptr) {
+ omp.loop_nest (%arg0) : i64 = (%1) to (%0) inclusive step (%1) {
+ %4 = llvm.load %3 : !llvm.ptr -> i64
+ llvm.store %4, %2 : i64, !llvm.ptr
+ omp.yield
+ }
+ omp.terminator
+ }
+ llvm.return
+}
+// -----
+
llvm.func @body(i64)
llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () {
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about a flang box (runtime type descriptor)? In LLVMIR this would look like a pointer to a struct where the first element is a pointer to an array. So I think this would go through two (or more*) GEP instructions to reach the array.
You would want to take care whether it is the box (metadata) or the array (data) which are marked as non-temporal.
Perhaps this is handled in flang lowering?
(*) we currently have a cases with bad codegen leading to a pointer to a pointer to the box.
Closing this PR as it is being tracked in #118751 |
This patch adds nontemporal metadata to loads and stores of nontemporal list items.