Skip to content

Commit 8117009

Browse files
author
git apple-llvm automerger
committed
Merge commit '21199f9842df' from llvm.org/main into next
2 parents 2f4f437 + 21199f9 commit 8117009

File tree

3 files changed

+87
-3
lines changed

3 files changed

+87
-3
lines changed

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2876,9 +2876,10 @@ OpenMPIRBuilder::applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
28762876
// We need to model loop body region as the function f(cnt, loop_arg).
28772877
// That's why we replace loop induction variable by the new counter
28782878
// which will be one of loop body function argument
2879-
for (auto Use = CLI->getIndVar()->user_begin();
2880-
Use != CLI->getIndVar()->user_end(); ++Use) {
2881-
if (Instruction *Inst = dyn_cast<Instruction>(*Use)) {
2879+
SmallVector<User *> Users(CLI->getIndVar()->user_begin(),
2880+
CLI->getIndVar()->user_end());
2881+
for (auto Use : Users) {
2882+
if (Instruction *Inst = dyn_cast<Instruction>(Use)) {
28822883
if (ParallelRegionBlockSet.count(Inst->getParent())) {
28832884
Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
28842885
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
// The aim of the test is to check the GPU LLVM IR codegen
4+
// for nested omp do loop with collapse clause inside omp target region
5+
6+
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true } {
7+
llvm.func @target_collapsed_wsloop(%arg0: !llvm.ptr) {
8+
%loop_ub = llvm.mlir.constant(99 : i32) : i32
9+
%loop_lb = llvm.mlir.constant(0 : i32) : i32
10+
%loop_step = llvm.mlir.constant(1 : index) : i32
11+
omp.wsloop for (%arg1, %arg2) : i32 = (%loop_lb, %loop_lb) to (%loop_ub, %loop_ub) inclusive step (%loop_step, %loop_step) {
12+
%1 = llvm.add %arg1, %arg2 : i32
13+
%2 = llvm.mul %arg2, %loop_ub overflow<nsw> : i32
14+
%3 = llvm.add %arg1, %2 :i32
15+
%4 = llvm.getelementptr %arg0[%3] : (!llvm.ptr, i32) -> !llvm.ptr, i32
16+
llvm.store %1, %4 : i32, !llvm.ptr
17+
omp.yield
18+
}
19+
llvm.return
20+
}
21+
}
22+
23+
// CHECK: define void @[[FUNC_COLLAPSED_WSLOOP:.*]](ptr %[[ARG0:.*]])
24+
// CHECK: call void @__kmpc_for_static_loop_4u(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr),
25+
// CHECK-SAME: ptr @[[COLLAPSED_WSLOOP_BODY_FN:.*]], ptr %[[STRUCT_ARG:.*]], i32 10000,
26+
// CHECK-SAME: i32 %[[NUM_THREADS:.*]], i32 0)
27+
28+
// CHECK: define internal void @[[COLLAPSED_WSLOOP_BODY_FN]](i32 %[[LOOP_CNT:.*]], ptr %[[LOOP_BODY_ARG:.*]])
29+
// CHECK: %[[TMP0:.*]] = urem i32 %[[LOOP_CNT]], 100
30+
// CHECK: %[[TMP1:.*]] = udiv i32 %[[LOOP_CNT]], 100
31+
// CHECK: %[[TMP2:.*]] = mul i32 %[[TMP1]], 1
32+
// CHECK: %[[TMP3:.*]] = add i32 %[[TMP2]], 0
33+
// CHECK: %[[TMP4:.*]] = mul i32 %[[TMP0]], 1
34+
// CHECK: %[[TMP5:.*]] = add i32 %[[TMP4]], 0
35+
// CHECK: %[[TMP6:.*]] = add i32 %[[TMP3]], %[[TMP5]]
36+
// CHECK: %[[TMP7:.*]] = mul nsw i32 %[[TMP5]], 99
37+
// CHECK: %[[TMP8:.*]] = add i32 %[[TMP3]], %[[TMP7]]
38+
// CHECK: %[[TMP9:.*]] = getelementptr i32, ptr %[[ARRAY:.*]], i32 %[[TMP8]]
39+
// CHECK: store i32 %[[TMP6]], ptr %[[TMP9]], align 4
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
! Basic offloading test with a target region
2+
! REQUIRES: flang
3+
! UNSUPPORTED: nvptx64-nvidia-cuda-LTO
4+
! UNSUPPORTED: aarch64-unknown-linux-gnu
5+
! UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
6+
! UNSUPPORTED: x86_64-pc-linux-gnu
7+
! UNSUPPORTED: x86_64-pc-linux-gnu-LTO
8+
9+
! RUN: %libomptarget-compile-fortran-generic
10+
! RUN: env LIBOMPTARGET_INFO=16 %libomptarget-run-generic 2>&1 | %fcheck-generic
11+
program main
12+
use omp_lib
13+
implicit none
14+
integer :: i,j
15+
integer :: array(10,10), errors = 0
16+
do i = 1, 10
17+
do j = 1, 10
18+
array(j, i) = 0
19+
end do
20+
end do
21+
22+
!$omp target parallel do map(from:array) collapse(2)
23+
do i = 1, 10
24+
do j = 1, 10
25+
array( j, i) = i + j
26+
end do
27+
end do
28+
!$omp end target parallel do
29+
30+
do i = 1, 10
31+
do j = 1, 10
32+
if ( array( j, i) .ne. (i + j) ) then
33+
errors = errors + 1
34+
end if
35+
end do
36+
end do
37+
38+
print *,"number of errors: ", errors
39+
40+
end program main
41+
42+
! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}}
43+
! CHECK: number of errors: 0
44+

0 commit comments

Comments
 (0)