-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[NFC][OpenMP][MLIR] Add test for lowering parallel workshare GPU loop #76144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NFC][OpenMP][MLIR] Add test for lowering parallel workshare GPU loop #76144
Conversation
This test checks if MLIR code is lowered according to schema presented below: func1(){ call __kmpc_parallel_51(..., func2, ...) } func2() { call __kmpc_for_static_loop_4u(..., func3, ...) } func3() { //loop body }
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-llvm Author: Dominik Adamski (DominikAdamski) ChangesThis test checks if MLIR code is lowered according to schema presented below: func1() { func2() { func3() { Full diff: https://github.com/llvm/llvm-project/pull/76144.diff 1 Files Affected:
diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
new file mode 100644
index 00000000000000..43d0934d3a931e
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-parallel-wsloop.mlir
@@ -0,0 +1,36 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// The aim of the test is to check the GPU LLVM IR codegen
+// for nested omp do loop inside omp target region
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
+ llvm.func @target_parallel_wsloop(%arg0: !llvm.ptr ){
+ omp.parallel {
+ %loop_ub = llvm.mlir.constant(9 : i32) : i32
+ %loop_lb = llvm.mlir.constant(0 : i32) : i32
+ %loop_step = llvm.mlir.constant(1 : i32) : i32
+ omp.wsloop for (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) {
+ %gep = llvm.getelementptr %arg0[0, %loop_cnt] : (!llvm.ptr, i32) -> !llvm.ptr, !llvm.array<10 x i32>
+ llvm.store %loop_cnt, %gep : i32, !llvm.ptr
+ omp.yield
+ }
+ omp.terminator
+ }
+
+ llvm.return
+ }
+
+}
+// CHECK: call void @__kmpc_parallel_51(ptr addrspacecast
+// CHECK-SAME: (ptr addrspace(1) @[[GLOB:[0-9]+]] to ptr),
+// CHECK-SAME: i32 %[[THREAD_NUM:.*]], i32 1, i32 -1, i32 -1,
+// CHECK-SAME: ptr @[[PARALLEL_FUNC:.*]], ptr null, ptr %[[PARALLEL_ARGS:.*]], i64 1)
+
+// CHECK: define internal void @[[PARALLEL_FUNC]]
+// CHECK-SAME: (ptr noalias noundef %[[TID_ADDR:.*]], ptr noalias noundef %[[ZERO_ADDR:.*]],
+// CHECK-SAME: ptr %[[ARG_PTR:.*]])
+// CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB]] to ptr),
+// CHECK-SAME: ptr @[[LOOP_BODY_FUNC:.*]], ptr %[[LOO_BODY_FUNC_ARG:.*]], i32 10,
+// CHECK-SAME: i32 %[[THREAD_NUM:.*]], i32 0)
+
+// CHECK: define internal void @[[LOOP_BODY_FUNC]](i32 %[[CNT:.*]], ptr %[[LOOP_BODY_ARG_PTR:.*]]) {
|
Could you share the command-line flags that have to be used with |
I guess it is something like the following.
|
The MLIR test case was reduced by hand. The initial code was similar to:
I am able to generate test-openmp-amdgcn-amd-amdhsa-gfx90a-llvmir.mlir file by command: Or: |
This test checks if MLIR code is lowered according to schema presented below:
func1() {
call __kmpc_parallel_51(..., func2, ...)
}
func2() {
call __kmpc_for_static_loop_4u(..., func3, ...)
}
func3() {
//loop body
}