Skip to content

Commit d346c82

Browse files
authored
[OpenMP] Associate the KernelEnvironment with the GenericKernelTy (#70383)
By associating the kernel environment with the generic kernel we can access middle-end information easily, including the launch bounds ranges that are acceptable. By constraining the number of threads accordingly, we now obey the user-provided bounds that were passed via attributes.
1 parent d8f5a18 commit d346c82

File tree

10 files changed

+74
-105
lines changed

10 files changed

+74
-105
lines changed

clang/test/OpenMP/bug57757.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,24 +32,23 @@ void foo() {
3232
// CHECK-NEXT: entry:
3333
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP1]], i64 0, i32 2
3434
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]])
35-
// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]])
36-
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA18:![0-9]+]], !alias.scope !13, !noalias !16
35+
// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !tbaa [[TBAA16:![0-9]+]], !alias.scope !13, !noalias !17
3736
// CHECK-NEXT: switch i32 [[TMP3]], label [[DOTOMP_OUTLINED__EXIT:%.*]] [
3837
// CHECK-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]]
3938
// CHECK-NEXT: i32 1, label [[DOTUNTIED_NEXT__I:%.*]]
4039
// CHECK-NEXT: ]
4140
// CHECK: .untied.jmp..i:
42-
// CHECK-NEXT: store i32 1, ptr [[TMP2]], align 4, !tbaa [[TBAA18]], !alias.scope !13, !noalias !16
43-
// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]), !noalias !19
41+
// CHECK-NEXT: store i32 1, ptr [[TMP2]], align 4, !tbaa [[TBAA16]], !alias.scope !13, !noalias !17
42+
// CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @__kmpc_omp_task(ptr nonnull @[[GLOB1]], i32 [[TMP0]], ptr [[TMP1]]), !noalias !13
4443
// CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]]
4544
// CHECK: .untied.next..i:
4645
// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP1]], i64 0, i32 1
4746
// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP1]], i64 0, i32 1, i32 2
4847
// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP1]], i64 0, i32 1, i32 1
49-
// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA20:![0-9]+]], !alias.scope !16, !noalias !13
50-
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA18]], !alias.scope !16, !noalias !13
51-
// CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA21:![0-9]+]], !alias.scope !16, !noalias !13
52-
// CHECK-NEXT: tail call void [[TMP8]](i32 noundef [[TMP9]], float noundef [[TMP10]]) #[[ATTR2:[0-9]+]], !noalias !19
48+
// CHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP5]], align 8, !tbaa [[TBAA19:![0-9]+]], !noalias !13
49+
// CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4, !tbaa [[TBAA16]], !noalias !13
50+
// CHECK-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP6]], align 4, !tbaa [[TBAA20:![0-9]+]], !noalias !13
51+
// CHECK-NEXT: tail call void [[TMP8]](i32 noundef [[TMP9]], float noundef [[TMP10]]) #[[ATTR2:[0-9]+]], !noalias !13
5352
// CHECK-NEXT: br label [[DOTOMP_OUTLINED__EXIT]]
5453
// CHECK: .omp_outlined..exit:
5554
// CHECK-NEXT: ret i32 0

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4093,8 +4093,8 @@ OpenMPIRBuilder::createTargetInit(const LocationDescription &Loc, bool IsSPMD,
40934093

40944094
Function *Kernel = Builder.GetInsertBlock()->getParent();
40954095

4096-
/// Manifest the launch configuration in the metadata matching the kernel
4097-
/// environment.
4096+
// Manifest the launch configuration in the metadata matching the kernel
4097+
// environment.
40984098
if (MinTeamsVal > 1 || MaxTeamsVal > 0)
40994099
writeTeamsForKernel(T, *Kernel, MinTeamsVal, MaxTeamsVal);
41004100

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
411411
/// generic kernel class.
412412
struct AMDGPUKernelTy : public GenericKernelTy {
413413
/// Create an AMDGPU kernel with a name and an execution mode.
414-
AMDGPUKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode)
415-
: GenericKernelTy(Name, ExecutionMode) {}
414+
AMDGPUKernelTy(const char *Name) : GenericKernelTy(Name) {}
416415

417416
/// Initialize the AMDGPU kernel.
418417
Error initImpl(GenericDeviceTy &Device, DeviceImageTy &Image) override {
@@ -1978,14 +1977,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
19781977

19791978
/// Allocate and construct an AMDGPU kernel.
19801979
Expected<GenericKernelTy &>
1981-
constructKernel(const __tgt_offload_entry &KernelEntry,
1982-
OMPTgtExecModeFlags ExecMode) override {
1980+
constructKernel(const __tgt_offload_entry &KernelEntry) override {
19831981
// Allocate and construct the AMDGPU kernel.
19841982
AMDGPUKernelTy *AMDGPUKernel = Plugin::get().allocate<AMDGPUKernelTy>();
19851983
if (!AMDGPUKernel)
19861984
return Plugin::error("Failed to allocate memory for AMDGPU kernel");
19871985

1988-
new (AMDGPUKernel) AMDGPUKernelTy(KernelEntry.name, ExecMode);
1986+
new (AMDGPUKernel) AMDGPUKernelTy(KernelEntry.name);
19891987

19901988
return *AMDGPUKernel;
19911989
}

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp

Lines changed: 27 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -339,9 +339,33 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
339339

340340
ImagePtr = &Image;
341341

342-
PreferredNumThreads = GenericDevice.getDefaultNumThreads();
342+
// Retrieve kernel environment object for the kernel.
343+
GlobalTy KernelEnv(std::string(Name) + "_kernel_environment",
344+
sizeof(KernelEnvironment), &KernelEnvironment);
345+
GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler();
346+
if (auto Err =
347+
GHandler.readGlobalFromImage(GenericDevice, *ImagePtr, KernelEnv)) {
348+
[[maybe_unused]] std::string ErrStr = toString(std::move(Err));
349+
DP("Failed to read kernel environment for '%s': %s\n"
350+
"Using default SPMD (2) execution mode\n",
351+
Name, ErrStr.data());
352+
KernelEnvironment.Configuration.ExecMode = OMP_TGT_EXEC_MODE_SPMD;
353+
KernelEnvironment.Configuration.MayUseNestedParallelism = /*Unknown=*/2;
354+
KernelEnvironment.Configuration.UseGenericStateMachine = /*Unknown=*/2;
355+
}
343356

344-
MaxNumThreads = GenericDevice.getThreadLimit();
357+
// Max = Config.Max > 0 ? min(Config.Max, Device.Max) : Device.Max;
358+
MaxNumThreads = KernelEnvironment.Configuration.MaxThreads > 0
359+
? std::min(KernelEnvironment.Configuration.MaxThreads,
360+
int32_t(GenericDevice.getThreadLimit()))
361+
: GenericDevice.getThreadLimit();
362+
363+
// Pref = Config.Pref > 0 ? max(Config.Pref, Device.Pref) : Device.Pref;
364+
PreferredNumThreads =
365+
KernelEnvironment.Configuration.MinThreads > 0
366+
? std::max(KernelEnvironment.Configuration.MinThreads,
367+
int32_t(GenericDevice.getDefaultNumThreads()))
368+
: GenericDevice.getDefaultNumThreads();
345369

346370
return initImpl(GenericDevice, Image);
347371
}
@@ -890,13 +914,8 @@ Error GenericDeviceTy::registerKernelOffloadEntry(
890914
__tgt_offload_entry &DeviceEntry) {
891915
DeviceEntry = KernelEntry;
892916

893-
// Retrieve the execution mode.
894-
auto ExecModeOrErr = getExecutionModeForKernel(KernelEntry.name, Image);
895-
if (!ExecModeOrErr)
896-
return ExecModeOrErr.takeError();
897-
898917
// Create a kernel object.
899-
auto KernelOrErr = constructKernel(KernelEntry, *ExecModeOrErr);
918+
auto KernelOrErr = constructKernel(KernelEntry);
900919
if (!KernelOrErr)
901920
return KernelOrErr.takeError();
902921

@@ -914,45 +933,6 @@ Error GenericDeviceTy::registerKernelOffloadEntry(
914933
return Plugin::success();
915934
}
916935

917-
Expected<KernelEnvironmentTy>
918-
GenericDeviceTy::getKernelEnvironmentForKernel(StringRef Name,
919-
DeviceImageTy &Image) {
920-
// Create a metadata object for the kernel environment object.
921-
StaticGlobalTy<KernelEnvironmentTy> KernelEnv(Name.data(),
922-
"_kernel_environment");
923-
924-
// Retrieve kernel environment object for the kernel.
925-
GenericGlobalHandlerTy &GHandler = Plugin::get().getGlobalHandler();
926-
if (auto Err = GHandler.readGlobalFromImage(*this, Image, KernelEnv))
927-
return std::move(Err);
928-
929-
return KernelEnv.getValue();
930-
}
931-
932-
Expected<OMPTgtExecModeFlags>
933-
GenericDeviceTy::getExecutionModeForKernel(StringRef Name,
934-
DeviceImageTy &Image) {
935-
auto KernelEnvOrError = getKernelEnvironmentForKernel(Name, Image);
936-
if (!KernelEnvOrError) {
937-
[[maybe_unused]] std::string ErrStr =
938-
toString(KernelEnvOrError.takeError());
939-
DP("Failed to read kernel environment for '%s': %s\n"
940-
"Using default SPMD (2) execution mode\n",
941-
Name.data(), ErrStr.data());
942-
return OMP_TGT_EXEC_MODE_SPMD;
943-
}
944-
945-
auto &KernelEnv = *KernelEnvOrError;
946-
auto ExecMode = KernelEnv.Configuration.ExecMode;
947-
948-
// Check that the retrieved execution mode is valid.
949-
if (!GenericKernelTy::isValidExecutionMode(ExecMode))
950-
return Plugin::error("Invalid execution mode %d for '%s'", ExecMode,
951-
Name.data());
952-
953-
return ExecMode;
954-
}
955-
956936
Error PinnedAllocationMapTy::insertEntry(void *HstPtr, void *DevAccessiblePtr,
957937
size_t Size, bool ExternallyLocked) {
958938
// Insert the new entry into the map.

openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.h

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,8 @@ class DeviceImageTy {
255255
/// implement the necessary virtual function members.
256256
struct GenericKernelTy {
257257
/// Construct a kernel with a name and a execution mode.
258-
GenericKernelTy(const char *Name, OMPTgtExecModeFlags ExecutionMode)
259-
: Name(Name), ExecutionMode(ExecutionMode), PreferredNumThreads(0),
260-
MaxNumThreads(0) {}
258+
GenericKernelTy(const char *Name)
259+
: Name(Name), PreferredNumThreads(0), MaxNumThreads(0) {}
261260

262261
virtual ~GenericKernelTy() {}
263262

@@ -285,6 +284,11 @@ struct GenericKernelTy {
285284
return *ImagePtr;
286285
}
287286

287+
/// Return the kernel environment object for kernel \p Name.
288+
const KernelEnvironmentTy &getKernelEnvironmentForKernel() {
289+
return KernelEnvironment;
290+
}
291+
288292
/// Indicate whether an execution mode is valid.
289293
static bool isValidExecutionMode(OMPTgtExecModeFlags ExecutionMode) {
290294
switch (ExecutionMode) {
@@ -299,7 +303,7 @@ struct GenericKernelTy {
299303
protected:
300304
/// Get the execution mode name of the kernel.
301305
const char *getExecutionModeName() const {
302-
switch (ExecutionMode) {
306+
switch (KernelEnvironment.Configuration.ExecMode) {
303307
case OMP_TGT_EXEC_MODE_SPMD:
304308
return "SPMD";
305309
case OMP_TGT_EXEC_MODE_GENERIC:
@@ -343,19 +347,20 @@ struct GenericKernelTy {
343347

344348
/// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode.
345349
bool isGenericSPMDMode() const {
346-
return ExecutionMode == OMP_TGT_EXEC_MODE_GENERIC_SPMD;
350+
return KernelEnvironment.Configuration.ExecMode ==
351+
OMP_TGT_EXEC_MODE_GENERIC_SPMD;
347352
}
348353
bool isGenericMode() const {
349-
return ExecutionMode == OMP_TGT_EXEC_MODE_GENERIC;
354+
return KernelEnvironment.Configuration.ExecMode ==
355+
OMP_TGT_EXEC_MODE_GENERIC;
356+
}
357+
bool isSPMDMode() const {
358+
return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_SPMD;
350359
}
351-
bool isSPMDMode() const { return ExecutionMode == OMP_TGT_EXEC_MODE_SPMD; }
352360

353361
/// The kernel name.
354362
const char *Name;
355363

356-
/// The execution flags of the kernel.
357-
OMPTgtExecModeFlags ExecutionMode;
358-
359364
/// The image that contains this kernel.
360365
DeviceImageTy *ImagePtr = nullptr;
361366

@@ -365,6 +370,9 @@ struct GenericKernelTy {
365370

366371
/// The maximum number of threads which the kernel could leverage.
367372
uint32_t MaxNumThreads;
373+
374+
/// The kernel environment, including execution flags.
375+
KernelEnvironmentTy KernelEnvironment;
368376
};
369377

370378
/// Class representing a map of host pinned allocations. We track these pinned
@@ -819,8 +827,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
819827

820828
/// Allocate and construct a kernel object.
821829
virtual Expected<GenericKernelTy &>
822-
constructKernel(const __tgt_offload_entry &KernelEntry,
823-
OMPTgtExecModeFlags ExecMode) = 0;
830+
constructKernel(const __tgt_offload_entry &KernelEntry) = 0;
824831

825832
/// Get and set the stack size and heap size for the device. If not used, the
826833
/// plugin can implement the setters as no-op and setting the output
@@ -864,10 +871,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
864871
UInt32Envar("LIBOMPTARGET_MIN_THREADS_FOR_LOW_TRIP_COUNT", 32);
865872

866873
protected:
867-
/// Return the execution mode used for kernel \p Name.
868-
virtual Expected<OMPTgtExecModeFlags>
869-
getExecutionModeForKernel(StringRef Name, DeviceImageTy &Image);
870-
871874
/// Environment variables defined by the LLVM OpenMP implementation
872875
/// regarding the initial number of streams and events.
873876
UInt32Envar OMPX_InitialNumStreams;
@@ -916,10 +919,6 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
916919
#endif
917920

918921
private:
919-
/// Return the kernel environment object for kernel \p Name.
920-
Expected<KernelEnvironmentTy>
921-
getKernelEnvironmentForKernel(StringRef Name, DeviceImageTy &Image);
922-
923922
DeviceMemoryPoolTy DeviceMemoryPool = {nullptr, 0};
924923
DeviceMemoryPoolTrackingTy DeviceMemoryPoolTracking = {0, 0, ~0U, 0};
925924
};

openmp/libomptarget/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,7 @@ struct CUDADeviceImageTy : public DeviceImageTy {
7979
/// generic kernel class.
8080
struct CUDAKernelTy : public GenericKernelTy {
8181
/// Create a CUDA kernel with a name and an execution mode.
82-
CUDAKernelTy(const char *Name, OMPTgtExecModeFlags ExecMode)
83-
: GenericKernelTy(Name, ExecMode), Func(nullptr) {}
82+
CUDAKernelTy(const char *Name) : GenericKernelTy(Name), Func(nullptr) {}
8483

8584
/// Initialize the CUDA kernel.
8685
Error initImpl(GenericDeviceTy &GenericDevice,
@@ -356,14 +355,13 @@ struct CUDADeviceTy : public GenericDeviceTy {
356355

357356
/// Allocate and construct a CUDA kernel.
358357
Expected<GenericKernelTy &>
359-
constructKernel(const __tgt_offload_entry &KernelEntry,
360-
OMPTgtExecModeFlags ExecMode) override {
358+
constructKernel(const __tgt_offload_entry &KernelEntry) override {
361359
// Allocate and construct the CUDA kernel.
362360
CUDAKernelTy *CUDAKernel = Plugin::get().allocate<CUDAKernelTy>();
363361
if (!CUDAKernel)
364362
return Plugin::error("Failed to allocate memory for CUDA kernel");
365363

366-
new (CUDAKernel) CUDAKernelTy(KernelEntry.name, ExecMode);
364+
new (CUDAKernel) CUDAKernelTy(KernelEntry.name);
367365

368366
return *CUDAKernel;
369367
}

openmp/libomptarget/plugins-nextgen/generic-elf-64bit/src/rtl.cpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
#include "llvm/ADT/SmallVector.h"
2727
#include "llvm/Frontend/OpenMP/OMPConstants.h"
28+
#include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
2829
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
2930
#include "llvm/Support/DynamicLibrary.h"
3031

@@ -51,8 +52,7 @@ using llvm::sys::DynamicLibrary;
5152
/// Class implementing kernel functionalities for GenELF64.
5253
struct GenELF64KernelTy : public GenericKernelTy {
5354
/// Construct the kernel with a name and an execution mode.
54-
GenELF64KernelTy(const char *Name, OMPTgtExecModeFlags ExecMode)
55-
: GenericKernelTy(Name, ExecMode), Func(nullptr) {}
55+
GenELF64KernelTy(const char *Name) : GenericKernelTy(Name), Func(nullptr) {}
5656

5757
/// Initialize the kernel.
5858
Error initImpl(GenericDeviceTy &Device, DeviceImageTy &Image) override {
@@ -71,6 +71,10 @@ struct GenELF64KernelTy : public GenericKernelTy {
7171
// Save the function pointer.
7272
Func = (void (*)())Global.getPtr();
7373

74+
KernelEnvironment.Configuration.ExecMode = OMP_TGT_EXEC_MODE_GENERIC;
75+
KernelEnvironment.Configuration.MayUseNestedParallelism = /* Unknown */ 2;
76+
KernelEnvironment.Configuration.UseGenericStateMachine = /* Unknown */ 2;
77+
7478
// Set the maximum number of threads to a single.
7579
MaxNumThreads = 1;
7680
return Plugin::success();
@@ -137,15 +141,14 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
137141

138142
/// Construct the kernel for a specific image on the device.
139143
Expected<GenericKernelTy &>
140-
constructKernel(const __tgt_offload_entry &KernelEntry,
141-
OMPTgtExecModeFlags ExecMode) override {
144+
constructKernel(const __tgt_offload_entry &KernelEntry) override {
142145
// Allocate and construct the kernel.
143146
GenELF64KernelTy *GenELF64Kernel =
144147
Plugin::get().allocate<GenELF64KernelTy>();
145148
if (!GenELF64Kernel)
146149
return Plugin::error("Failed to allocate memory for GenELF64 kernel");
147150

148-
new (GenELF64Kernel) GenELF64KernelTy(KernelEntry.name, ExecMode);
151+
new (GenELF64Kernel) GenELF64KernelTy(KernelEntry.name);
149152

150153
return *GenELF64Kernel;
151154
}
@@ -325,13 +328,6 @@ struct GenELF64DeviceTy : public GenericDeviceTy {
325328
}
326329
Error setDeviceHeapSize(uint64_t Value) override { return Plugin::success(); }
327330

328-
protected:
329-
/// Retrieve the execution mode for kernels. All kernels use the generic mode.
330-
Expected<OMPTgtExecModeFlags>
331-
getExecutionModeForKernel(StringRef Name, DeviceImageTy &Image) override {
332-
return OMP_TGT_EXEC_MODE_GENERIC;
333-
}
334-
335331
private:
336332
/// Grid values for Generic ELF64 plugins.
337333
static constexpr GV GenELF64GridValues = {

openmp/libomptarget/test/offloading/default_thread_limit.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ int main() {
4848
for (int i = 0; i < N; ++i) {
4949
optnone();
5050
}
51-
// FIXME: Use the attribute value to imply a thread_limit
52-
// DEFAULT: {{(128|256)}} (MaxFlatWorkGroupSize: 42
51+
// DEFAULT: 42 (MaxFlatWorkGroupSize: 42
5352
#pragma omp target ompx_attribute(__attribute__((amdgpu_flat_work_group_size(42, 42))))
5453
#pragma omp teams distribute parallel for
5554
for (int i = 0; i < N; ++i) {

openmp/libomptarget/test/offloading/thread_state_1.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ int main() {
2626
}
2727
}
2828
}
29-
if (o_lvl == 1 && o_tid == o_nt - 1 && o_nt > 1 && i_lvl == 2 && i_tid == 0 &&
30-
i_nt == 1) {
29+
if (o_lvl == 1 && o_tid == o_nt - 1 && o_nt >= 1 && i_lvl == 2 &&
30+
i_tid == 0 && i_nt == 1) {
3131
// CHECK: Success
3232
printf("Success\n");
3333
return 0;

openmp/libomptarget/test/offloading/thread_state_2.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ int main() {
2828
}
2929
}
3030
}
31-
if (o_lvl == 1 && o_tid == o_nt - 1 && o_nt > 1 && i_lvl == 2 && i_tid == 0 &&
32-
i_nt == 1) {
31+
if (o_lvl == 1 && o_tid == o_nt - 1 && o_nt >= 1 && i_lvl == 2 &&
32+
i_tid == 0 && i_nt == 1) {
3333
// CHECK: Success
3434
printf("Success\n");
3535
return 0;

0 commit comments

Comments
 (0)