-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Clang][OpenMP] Fix mapping of structs to device #75642
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Gheorghe-Teodor Bercea (doru1004) ChangesFix mapping of structs to device. The following example fails:
This is a rework of the previous attempt: #72410 Patch is 30.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75642.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 7f7e6f53066644..02f5d8fca7090c 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6811,8 +6811,10 @@ class MappableExprsHandler {
OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
- MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
- bool IsFirstComponentList, bool IsImplicit,
+ MapCombinedInfoTy &CombinedInfo,
+ MapCombinedInfoTy &StructBaseCombinedInfo,
+ StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
+ bool IsImplicit, bool GenerateAllInfoForClauses,
const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
@@ -7098,6 +7100,25 @@ class MappableExprsHandler {
bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
bool IsPrevMemberReference = false;
+ // We need to check if we will be encountering any MEs. If we do not
+ // encounter any ME expression it means we will be mapping the whole struct.
+ // In that case we need to skip adding an entry for the struct to the
+ // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
+ // list only when generating all info for clauses.
+ bool IsMappingWholeStruct = true;
+ if (!GenerateAllInfoForClauses) {
+ IsMappingWholeStruct = false;
+ } else {
+ for (auto TempI = I; TempI != CE; ++TempI) {
+ const MemberExpr *PossibleME =
+ dyn_cast<MemberExpr>(TempI->getAssociatedExpression());
+ if (PossibleME) {
+ IsMappingWholeStruct = false;
+ break;
+ }
+ }
+ }
+
for (; I != CE; ++I) {
// If the current component is member of a struct (parent struct) mark it.
if (!EncounteredME) {
@@ -7317,21 +7338,41 @@ class MappableExprsHandler {
break;
}
llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
+ // Skip adding an entry in the CurInfo of this combined entry if the
+ // whole struct is currently being mapped. The struct needs to be added
+ // in the first position before any data internal to the struct is being
+ // mapped.
if (!IsMemberPointerOrAddr ||
(Next == CE && MapType != OMPC_MAP_unknown)) {
- CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
- CombinedInfo.BasePointers.push_back(BP.getPointer());
- CombinedInfo.DevicePtrDecls.push_back(nullptr);
- CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
- CombinedInfo.Pointers.push_back(LB.getPointer());
- CombinedInfo.Sizes.push_back(
- CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
- CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
- : 1);
+ if (!IsMappingWholeStruct) {
+ CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
+ CombinedInfo.BasePointers.push_back(BP.getPointer());
+ CombinedInfo.DevicePtrDecls.push_back(nullptr);
+ CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
+ CombinedInfo.Pointers.push_back(LB.getPointer());
+ CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
+ Size, CGF.Int64Ty, /*isSigned=*/true));
+ CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
+ : 1);
+ } else {
+ StructBaseCombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
+ StructBaseCombinedInfo.BasePointers.push_back(BP.getPointer());
+ StructBaseCombinedInfo.DevicePtrDecls.push_back(nullptr);
+ StructBaseCombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
+ StructBaseCombinedInfo.Pointers.push_back(LB.getPointer());
+ StructBaseCombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
+ Size, CGF.Int64Ty, /*isSigned=*/true));
+ StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
+ IsNonContiguous ? DimSize : 1);
+ }
// If Mapper is valid, the last component inherits the mapper.
bool HasMapper = Mapper && Next == CE;
- CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
+ if (!IsMappingWholeStruct)
+ CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
+ else
+ StructBaseCombinedInfo.Mappers.push_back(HasMapper ? Mapper
+ : nullptr);
// We need to add a pointer flag for each map that comes from the
// same expression except for the first one. We also need to signal
@@ -7363,7 +7404,10 @@ class MappableExprsHandler {
}
}
- CombinedInfo.Types.push_back(Flags);
+ if (!IsMappingWholeStruct)
+ CombinedInfo.Types.push_back(Flags);
+ else
+ StructBaseCombinedInfo.Types.push_back(Flags);
}
// If we have encountered a member expression so far, keep track of the
@@ -7954,8 +7998,10 @@ class MappableExprsHandler {
for (const auto &Data : Info) {
StructRangeInfoTy PartialStruct;
- // Temporary generated information.
+ // Current struct information:
MapCombinedInfoTy CurInfo;
+ // Current struct base information:
+ MapCombinedInfoTy StructBaseCurInfo;
const Decl *D = Data.first;
const ValueDecl *VD = cast_or_null<ValueDecl>(D);
for (const auto &M : Data.second) {
@@ -7965,29 +8011,54 @@ class MappableExprsHandler {
// Remember the current base pointer index.
unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
+ unsigned StructBasePointersIdx =
+ StructBaseCurInfo.BasePointers.size();
CurInfo.NonContigInfo.IsNonContiguous =
L.Components.back().isNonContiguous();
generateInfoForComponentList(
L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
- CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
- L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
+ CurInfo, StructBaseCurInfo, PartialStruct,
+ /*IsFirstComponentList=*/false, L.IsImplicit,
+ /*GenerateAllInfoForClauses*/ true, L.Mapper, L.ForDeviceAddr, VD,
+ L.VarRef);
- // If this entry relates with a device pointer, set the relevant
+ // If this entry relates to a device pointer, set the relevant
// declaration and add the 'return pointer' flag.
if (L.ReturnDevicePointer) {
- assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
+ // Check whether a value was added to either CurInfo or
+ // StructBaseCurInfo and error if no value was added to either of
+ // them:
+ assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
+ StructBasePointersIdx <
+ StructBaseCurInfo.BasePointers.size()) &&
"Unexpected number of mapped base pointers.");
+ // Choose a base pointer index which is always valid:
const ValueDecl *RelevantVD =
L.Components.back().getAssociatedDeclaration();
assert(RelevantVD &&
"No relevant declaration related with device pointer??");
- CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
- CurInfo.DevicePointers[CurrentBasePointersIdx] =
- L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer;
- CurInfo.Types[CurrentBasePointersIdx] |=
- OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+ // If StructBaseCurInfo has been updated this time then work on the
+ // first new entry instead of the last entry in CurInfo. Make sure
+ // that when multiple values are added to any of the lists, the
+ // first value added is being modified by the assignments below.
+ if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
+ StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
+ RelevantVD;
+ StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
+ L.ForDeviceAddr ? DeviceInfoTy::Address
+ : DeviceInfoTy::Pointer;
+ StructBaseCurInfo.Types[StructBasePointersIdx] |=
+ OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+ } else {
+ CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
+ CurInfo.DevicePointers[CurrentBasePointersIdx] =
+ L.ForDeviceAddr ? DeviceInfoTy::Address
+ : DeviceInfoTy::Pointer;
+ CurInfo.Types[CurrentBasePointersIdx] |=
+ OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
+ }
}
}
}
@@ -8034,17 +8105,24 @@ class MappableExprsHandler {
CurInfo.Mappers.push_back(nullptr);
}
}
+
+ // Unify entries in one list making sure the struct mapping precedes the
+ // individual fields:
+ MapCombinedInfoTy UnionCurInfo;
+ UnionCurInfo.append(StructBaseCurInfo);
+ UnionCurInfo.append(CurInfo);
+
// If there is an entry in PartialStruct it means we have a struct with
// individual members mapped. Emit an extra combined entry.
if (PartialStruct.Base.isValid()) {
- CurInfo.NonContigInfo.Dims.push_back(0);
- emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
+ UnionCurInfo.NonContigInfo.Dims.push_back(0);
+ // Emit a combined entry:
+ emitCombinedEntry(CombinedInfo, UnionCurInfo.Types, PartialStruct,
/*IsMapThis*/ !VD, OMPBuilder, VD);
}
- // We need to append the results of this capture to what we already
- // have.
- CombinedInfo.append(CurInfo);
+ // We need to append the results of this capture to what we already have.
+ CombinedInfo.append(UnionCurInfo);
}
// Append data for use_device_ptr clauses.
CombinedInfo.append(UseDeviceDataCombinedInfo);
@@ -8554,6 +8632,7 @@ class MappableExprsHandler {
// Associated with a capture, because the mapping flags depend on it.
// Go through all of the elements with the overlapped elements.
bool IsFirstComponentList = true;
+ MapCombinedInfoTy StructBaseCombinedInfo;
for (const auto &Pair : OverlappedData) {
const MapData &L = *Pair.getFirst();
OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
@@ -8568,7 +8647,8 @@ class MappableExprsHandler {
OverlappedComponents = Pair.getSecond();
generateInfoForComponentList(
MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
- PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
+ StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
+ IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
/*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
IsFirstComponentList = false;
}
@@ -8584,10 +8664,11 @@ class MappableExprsHandler {
L;
auto It = OverlappedData.find(&L);
if (It == OverlappedData.end())
- generateInfoForComponentList(MapType, MapModifiers, std::nullopt,
- Components, CombinedInfo, PartialStruct,
- IsFirstComponentList, IsImplicit, Mapper,
- /*ForDeviceAddr=*/false, VD, VarRef);
+ generateInfoForComponentList(
+ MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
+ StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
+ IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
+ /*ForDeviceAddr=*/false, VD, VarRef);
IsFirstComponentList = false;
}
}
diff --git a/clang/test/OpenMP/map_struct_ordering.cpp b/clang/test/OpenMP/map_struct_ordering.cpp
new file mode 100644
index 00000000000000..035b39b5b12ab4
--- /dev/null
+++ b/clang/test/OpenMP/map_struct_ordering.cpp
@@ -0,0 +1,172 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" --prefix-filecheck-ir-name _ --version 4
+
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -Wno-openmp-mapping | FileCheck %s --check-prefix=CHECK
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+struct Descriptor {
+ int *datum;
+ long int x;
+ int xi;
+ long int arr[1][30];
+};
+
+int map_struct() {
+ Descriptor dat = Descriptor();
+ dat.xi = 3;
+ dat.arr[0][0] = 1;
+
+ #pragma omp target enter data map(to: dat.datum[:10]) map(to: dat)
+
+ #pragma omp target
+ {
+ dat.xi = 4;
+ dat.datum[dat.arr[0][0]] = dat.xi;
+ }
+
+ #pragma omp target exit data map(from: dat)
+
+ return dat.xi;
+}
+
+#endif
+// CHECK-LABEL: define dso_local noundef signext i32 @_Z10map_structv(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[DAT:%.*]] = alloca [[STRUCT_DESCRIPTOR:%.*]], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [3 x i64], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[DAT]], i8 0, i64 264, i1 false)
+// CHECK-NEXT: [[XI:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 2
+// CHECK-NEXT: store i32 3, ptr [[XI]], align 8
+// CHECK-NEXT: [[ARR:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 3
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1 x [30 x i64]], ptr [[ARR]], i64 0, i64 0
+// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [30 x i64], ptr [[ARRAYIDX]], i64 0, i64 0
+// CHECK-NEXT: store i64 1, ptr [[ARRAYIDX1]], align 8
+// CHECK-NEXT: [[DATUM:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 0
+// CHECK-NEXT: [[DATUM2:%.*]] = getelementptr inbounds [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DATUM2]], align 8
+// CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 0
+// CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_DESCRIPTOR]], ptr [[DAT]], i32 1
+// CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
+// CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[DAT]] to i64
+// CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP2]], [[TMP3]]
+// CHECK-NEXT: [[TMP5:%.*]] = sdiv exact i64 [[TMP4]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes, i64 24, i1 false)
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP6]], align 8
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP7]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP8]], align 8
+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP9]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
+// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP10]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1
+// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP11]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
+// CHECK-NEXT: store ptr null, ptr [[TMP12]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[DATUM]], ptr [[TMP13]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[ARRAYIDX3]], ptr [[TMP14]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
+// CHECK-NEXT: store ptr null, ptr [[TMP15]], align 8
+// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0
+// CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 3, ptr [[TMP16]], ptr [[TMP17]], ptr [[TMP18]], ptr @.offload_maptypes, ptr null, ptr null)
+// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP19]], align 8
+// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[DAT]], ptr [[TMP20]], align 8
+// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP21]], align 8
+// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0
+// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS5]], i32 0, i32 0
+// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-NEXT: store i32 2, ptr [[TMP24]], align 4
+// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-NEXT: store i32 1, ptr [[TMP25]], align 4
+// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP22]], ptr [[TMP26]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-NEXT: store ptr [[TMP23]], ptr [[TMP27]], align 8
+// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.1, ptr [[TMP28]], align 8
+// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP29]], align 8
+// CHECK-NEXT: [[TMP30:%...
[truncated]
|
|
ae6cf04
to
e0e1f5e
Compare
e0e1f5e
to
3245448
Compare
@alexey-bataev I have reworked the previous patch with your advice in mind. The emitCombinedEntry function was not changed since eliminating the combined entry has many ramifications which would need to be handled in a separate patch. For now this fixes the immediate error in a way that allows us to later get rid of the combined entry if we want to. |
It appears that this patch made the buildbot unhappy (https://lab.llvm.org/buildbot/#/builders/193/builds/43948). Let me know if you need help with this. |
The newly added test
|
I disabled this test for NVIDIA for now: #75949 |
This fails for me on the host and the AMD GPU:
X86:
The location that is printed (datum[1]) is uninitialized. |
I see the same but forgot to say anything. |
… newly added test (#75807) Require presence of libomptarget-debug fixes llvm/llvm-project#75642
Fix mapping of structs to device.
The following example fails:
This is a rework of the previous attempt: #72410