Skip to content

Codegen changes for strict modifier with grainsize/num_tasks of taskloop construct #117196

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4666,7 +4666,7 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
CGF.getContext().VoidPtrTy);
}
enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
llvm::Value *TaskArgs[] = {
llvm::SmallVector<llvm::Value *, 12> TaskArgs{
UpLoc,
ThreadID,
Result.NewTask,
Expand All @@ -4683,12 +4683,18 @@ void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
Data.Schedule.getPointer()
? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
/*isSigned=*/false)
: llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Result.TaskDupFn, CGF.VoidPtrTy)
: llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
: llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0)};
if (Data.HasModifier)
TaskArgs.push_back(llvm::ConstantInt::get(CGF.Int32Ty, 1));

TaskArgs.push_back(Result.TaskDupFn
? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
Result.TaskDupFn, CGF.VoidPtrTy)
: llvm::ConstantPointerNull::get(CGF.VoidPtrTy));
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_taskloop),
CGM.getModule(), Data.HasModifier
? OMPRTL___kmpc_taskloop_5
: OMPRTL___kmpc_taskloop),
TaskArgs);
}

Expand Down
1 change: 1 addition & 0 deletions clang/lib/CodeGen/CGOpenMPRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ struct OMPTaskDataTy final {
bool IsReductionWithTaskMod = false;
bool IsWorksharingReduction = false;
bool HasNowaitClause = false;
bool HasModifier = false;
};

/// Class intended to support codegen of all kind of the reduction clauses.
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CGStmtOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7831,10 +7831,14 @@ void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
// grainsize clause
Data.Schedule.setInt(/*IntVal=*/false);
Data.Schedule.setPointer(EmitScalarExpr(Clause->getGrainsize()));
Data.HasModifier =
(Clause->getModifier() == OMPC_GRAINSIZE_strict) ? true : false;
} else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
// num_tasks clause
Data.Schedule.setInt(/*IntVal=*/true);
Data.Schedule.setPointer(EmitScalarExpr(Clause->getNumTasks()));
Data.HasModifier =
(Clause->getModifier() == OMPC_NUMTASKS_strict) ? true : false;
}

auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
Expand Down
165 changes: 165 additions & 0 deletions clang/test/OpenMP/taskloop_strictmodifier_codegen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --prefix-filecheck-ir-name _ --version 5
// RUN: %clang_cc1 -fopenmp -O1 -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK
// expected-no-diagnostics
#ifndef HEADER
#define HEADER

int main(int argc, char **argv) {
#pragma omp task
#pragma omp master taskloop priority(argc)
for (int i = 0; i < 10; ++i)
;
#pragma omp master taskloop nogroup grainsize(strict:argc)
for (int i = 0; i < 10; ++i)
;
int i;
#pragma omp master taskloop if(argc) shared(argc, argv) collapse(2) num_tasks(strict: 4)
for (i = 0; i < argc; ++i)
for (int j = argc; j < argv[argc][argc]; ++j)
;
#pragma omp master taskloop
for (int i = 0; i < 10; ++i) {
#pragma omp cancel taskgroup
#pragma omp cancellation point taskgroup
}
}
struct S {
int a;
S(int c) {
#pragma omp master taskloop shared(c) num_tasks(strict:a)
for (a = 0; a < c; ++a)
;
}
} s(1);

#endif

// CHECK-LABEL: define noundef i32 @main(
// CHECK-SAME: i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[ARGV_ADDR:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1:[0-9]+]])
// CHECK-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3:![0-9]+]]
// CHECK-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8, !tbaa [[TBAA7:![0-9]+]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 1, ptr nonnull @.omp_task_entry..2)
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 40
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
// CHECK-NEXT: store i32 [[TMP3]], ptr [[TMP2]], align 8, !tbaa [[TBAA9:![0-9]+]]
// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]])
// CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP5]], 0
// CHECK-NEXT: br i1 [[DOTNOT]], label %[[OMP_IF_END:.*]], label %[[OMP_IF_THEN:.*]]
// CHECK: [[OMP_IF_THEN]]:
// CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
// CHECK-NEXT: [[TMP7:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 1, ptr nonnull @.omp_task_entry..4)
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 40
// CHECK-NEXT: store i64 0, ptr [[TMP8]], align 8, !tbaa [[TBAA13:![0-9]+]]
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 48
// CHECK-NEXT: store i64 9, ptr [[TMP9]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 56
// CHECK-NEXT: store i64 1, ptr [[TMP10]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP7]], i64 72
// CHECK-NEXT: store i64 0, ptr [[TMP11]], align 8
// CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP6]] to i64
// CHECK-NEXT: tail call void @__kmpc_taskloop_5(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP7]], i32 1, ptr nonnull [[TMP8]], ptr nonnull [[TMP9]], i64 1, i32 1, i32 1, i64 [[TMP12]], i32 1, ptr null) #[[ATTR1:[0-9]+]]
// CHECK-NEXT: tail call void @__kmpc_end_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: br label %[[OMP_IF_END]]
// CHECK: [[OMP_IF_END]]:
// CHECK-NEXT: [[TMP13:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[DOTNOT22:%.*]] = icmp eq i32 [[TMP13]], 0
// CHECK-NEXT: br i1 [[DOTNOT22]], label %[[OMP_IF_END17:.*]], label %[[OMP_IF_THEN2:.*]]
// CHECK: [[OMP_IF_THEN2]]:
// CHECK-NEXT: tail call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
// CHECK-NEXT: [[TMP15:%.*]] = load ptr, ptr [[ARGV_ADDR]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP15]], i64 [[IDXPROM]]
// CHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i64 [[IDXPROM]]
// CHECK-NEXT: [[TMP17:%.*]] = load i8, ptr [[ARRAYIDX9]], align 1, !tbaa [[TBAA15:![0-9]+]]
// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP17]] to i32
// CHECK-NEXT: [[SUB12:%.*]] = sub i32 [[CONV]], [[TMP14]]
// CHECK-NEXT: [[CONV15:%.*]] = zext i32 [[SUB12]] to i64
// CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV15]], [[IDXPROM]]
// CHECK-NEXT: [[SUB16:%.*]] = add nsw i64 [[MUL]], -1
// CHECK-NEXT: [[TMP18:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..6)
// CHECK-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 8, !tbaa [[TBAA16:![0-9]+]]
// CHECK-NEXT: store ptr [[ARGC_ADDR]], ptr [[TMP19]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: [[AGG_CAPTURED3_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP19]], i64 8
// CHECK-NEXT: store ptr [[ARGV_ADDR]], ptr [[AGG_CAPTURED3_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !tbaa [[TBAA3]]
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP20]], 0
// CHECK-NEXT: [[TMP21:%.*]] = sext i1 [[TOBOOL]] to i32
// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 40
// CHECK-NEXT: store i64 0, ptr [[TMP22]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 48
// CHECK-NEXT: store i64 [[SUB16]], ptr [[TMP23]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 56
// CHECK-NEXT: store i64 1, ptr [[TMP24]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP18]], i64 72
// CHECK-NEXT: store i64 0, ptr [[TMP25]], align 8
// CHECK-NEXT: call void @__kmpc_taskloop_5(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP18]], i32 [[TMP21]], ptr nonnull [[TMP22]], ptr nonnull [[TMP23]], i64 1, i32 1, i32 2, i64 4, i32 1, ptr null) #[[ATTR1]]
// CHECK-NEXT: call void @__kmpc_end_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: call void @__kmpc_end_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: br label %[[OMP_IF_END17]]
// CHECK: [[OMP_IF_END17]]:
// CHECK-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[DOTNOT23:%.*]] = icmp eq i32 [[TMP26]], 0
// CHECK-NEXT: br i1 [[DOTNOT23]], label %[[OMP_IF_END21:.*]], label %[[OMP_IF_THEN18:.*]]
// CHECK: [[OMP_IF_THEN18]]:
// CHECK-NEXT: call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP27:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 1, ptr nonnull @.omp_task_entry..8)
// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 40
// CHECK-NEXT: store i64 0, ptr [[TMP28]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 48
// CHECK-NEXT: store i64 9, ptr [[TMP29]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 56
// CHECK-NEXT: store i64 1, ptr [[TMP30]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP27]], i64 72
// CHECK-NEXT: store i64 0, ptr [[TMP31]], align 8
// CHECK-NEXT: call void @__kmpc_taskloop(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP27]], i32 1, ptr nonnull [[TMP28]], ptr nonnull [[TMP29]], i64 1, i32 1, i32 0, i64 0, ptr null)
// CHECK-NEXT: call void @__kmpc_end_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: call void @__kmpc_end_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: br label %[[OMP_IF_END21]]
// CHECK: [[OMP_IF_END21]]:
// CHECK-NEXT: ret i32 0
//
//
//
// CHECK-LABEL: define linkonce_odr void @_ZN1SC2Ei(
// CHECK-SAME: ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[C:%.*]]) unnamed_addr #[[ATTR6:[0-9]+]] align 2 {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]])
// CHECK-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4, !tbaa [[TBAA3]]
// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0
// CHECK-NEXT: br i1 [[DOTNOT]], label %[[OMP_IF_END:.*]], label %[[OMP_IF_THEN:.*]]
// CHECK: [[OMP_IF_THEN]]:
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[THIS]], align 4, !tbaa [[TBAA27:![0-9]+]]
// CHECK-NEXT: tail call void @__kmpc_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[C_ADDR]], align 4, !tbaa [[TBAA3]]
// CHECK-NEXT: [[SUB4:%.*]] = add nsw i32 [[TMP3]], -1
// CHECK-NEXT: [[TMP4:%.*]] = tail call ptr @__kmpc_omp_task_alloc(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i64 80, i64 16, ptr nonnull @.omp_task_entry..10)
// CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8, !tbaa [[TBAA16]]
// CHECK-NEXT: store ptr [[THIS]], ptr [[TMP5]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: [[AGG_CAPTURED_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i64 8
// CHECK-NEXT: store ptr [[C_ADDR]], ptr [[AGG_CAPTURED_SROA_2_0__SROA_IDX]], align 8, !tbaa [[TBAA7]]
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 40
// CHECK-NEXT: store i64 0, ptr [[TMP6]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 48
// CHECK-NEXT: [[CONV:%.*]] = sext i32 [[SUB4]] to i64
// CHECK-NEXT: store i64 [[CONV]], ptr [[TMP7]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 56
// CHECK-NEXT: store i64 1, ptr [[TMP8]], align 8, !tbaa [[TBAA13]]
// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP4]], i64 72
// CHECK-NEXT: store i64 0, ptr [[TMP9]], align 8
// CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP2]] to i64
// CHECK-NEXT: call void @__kmpc_taskloop_5(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr nonnull [[TMP4]], i32 1, ptr nonnull [[TMP6]], ptr nonnull [[TMP7]], i64 1, i32 1, i32 2, i64 [[TMP10]], i32 1, ptr null) #[[ATTR1]]
// CHECK-NEXT: call void @__kmpc_end_taskgroup(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: call void @__kmpc_end_master(ptr nonnull @[[GLOB1]], i32 [[TMP0]])
// CHECK-NEXT: br label %[[OMP_IF_END]]
// CHECK: [[OMP_IF_END]]:
// CHECK-NEXT: ret void

3 changes: 3 additions & 0 deletions llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,9 @@ __OMP_RTL(__kmpc_omp_task_with_deps, false, Int32, IdentPtr, Int32,
__OMP_RTL(__kmpc_taskloop, false, Void, IdentPtr, /* Int */ Int32, VoidPtr,
/* Int */ Int32, Int64Ptr, Int64Ptr, Int64, /* Int */ Int32,
/* Int */ Int32, Int64, VoidPtr)
__OMP_RTL(__kmpc_taskloop_5, false, Void, IdentPtr, /* Int */ Int32, VoidPtr,
/* Int */ Int32, Int64Ptr, Int64Ptr, Int64, /* Int */ Int32,
/* Int */ Int32, Int64, Int32, VoidPtr)
__OMP_RTL(__kmpc_omp_target_task_alloc, false, /* kmp_task_t */ VoidPtr,
IdentPtr, Int32, Int32, SizeTy, SizeTy, TaskRoutineEntryPtr, Int64)
__OMP_RTL(__kmpc_taskred_modifier_init, false, /* kmp_taskgroup */ VoidPtr,
Expand Down