-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Clang][OpenMP] Fix runtime problem when explicit map both pointer and pointee #92210
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
For ponter int *p for following map, test currently crash. map(p, p[:100]) or map(p, p[1]) Currly IR looks like // &p, &p, sizeof(int), TARGET_PARAM | TO | FROM // &p, p[0], 100sizeof(float) TO | FROM Worrking IR is // map(p, p[0:100]) to map(p[0:100]) // &p, &p[0], 100*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ The change is add new argument AreBothBasePtrAndPteeMapped in generateInfoForComponentList Use that to skip map for map(p), when processing map(p[:100]) generate map with right flag.
@llvm/pr-subscribers-offload @llvm/pr-subscribers-clang Author: None (jyu2-git) ChangesFor ponter int *p for following map, test currently crash. map(p, p[:100]) or map(p, p[1]) Currly IR looks like Worrking IR is The change is add new argument AreBothBasePtrAndPteeMapped in generateInfoForComponentList Use that to skip map for map(p), when processing map(p[:100]) generate map with right flag. Full diff: https://github.com/llvm/llvm-project/pull/92210.diff 3 Files Affected:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e39c7c58d2780..f56af318ff6ae 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6830,7 +6830,8 @@ class MappableExprsHandler {
const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
- OverlappedElements = std::nullopt) const {
+ OverlappedElements = std::nullopt,
+ bool AreBothBasePtrAndPteeMapped = false) const {
// The following summarizes what has to be generated for each map and the
// types below. The generated information is expressed in this order:
// base pointer, section pointer, size, flags
@@ -7006,6 +7007,10 @@ class MappableExprsHandler {
// &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
// (*) the struct this entry pertains to is the 4th element in the list
// of arguments, hence MEMBER_OF(4)
+ //
+ // map(p, p[:100])
+ // ===> map(p[:100])
+ // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
// Track if the map information being generated is the first for a capture.
bool IsCaptureFirstInfo = IsFirstComponentList;
@@ -7029,6 +7034,8 @@ class MappableExprsHandler {
const auto *OASE = dyn_cast<ArraySectionExpr>(AssocExpr);
const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
+ if (AreBothBasePtrAndPteeMapped && std::next(I) == CE)
+ return;
if (isa<MemberExpr>(AssocExpr)) {
// The base is the 'this' pointer. The content of the pointer is going
// to be the base of the field being mapped.
@@ -7071,8 +7078,9 @@ class MappableExprsHandler {
// can be associated with the combined storage if shared memory mode is
// active or the base declaration is not global variable.
const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
- if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
- !VD || VD->hasLocalStorage())
+ if (!AreBothBasePtrAndPteeMapped &&
+ (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
+ !VD || VD->hasLocalStorage()))
BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
else
FirstPointerInComplexData = true;
@@ -7394,11 +7402,13 @@ class MappableExprsHandler {
// same expression except for the first one. We also need to signal
// this map is the first one that relates with the current capture
// (there is a set of entries for each capture).
- OpenMPOffloadMappingFlags Flags = getMapTypeBits(
- MapType, MapModifiers, MotionModifiers, IsImplicit,
- !IsExpressionFirstInfo || RequiresReference ||
- FirstPointerInComplexData || IsMemberReference,
- IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
+ OpenMPOffloadMappingFlags Flags =
+ getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
+ !IsExpressionFirstInfo || RequiresReference ||
+ FirstPointerInComplexData || IsMemberReference,
+ AreBothBasePtrAndPteeMapped ||
+ (IsCaptureFirstInfo && !RequiresReference),
+ IsNonContiguous);
if (!IsExpressionFirstInfo || IsMemberReference) {
// If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
@@ -8492,6 +8502,8 @@ class MappableExprsHandler {
assert(CurDir.is<const OMPExecutableDirective *>() &&
"Expect a executable directive");
const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
+ bool HasMapBasePtr = false;
+ bool HasMapArraySec = false;
for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
const auto *EI = C->getVarRefs().begin();
for (const auto L : C->decl_component_lists(VD)) {
@@ -8503,6 +8515,11 @@ class MappableExprsHandler {
assert(VDecl == VD && "We got information for the wrong declaration??");
assert(!Components.empty() &&
"Not expecting declaration with no component lists.");
+ if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
+ HasMapBasePtr = true;
+ if (VD && E && VD->getType()->isAnyPointerType() &&
+ (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
+ HasMapArraySec = true;
DeclComponentLists.emplace_back(Components, C->getMapType(),
C->getMapTypeModifiers(),
C->isImplicit(), Mapper, E);
@@ -8685,7 +8702,9 @@ class MappableExprsHandler {
MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
- /*ForDeviceAddr=*/false, VD, VarRef);
+ /*ForDeviceAddr=*/false, VD, VarRef,
+ /*OverlappedElements*/ std::nullopt,
+ HasMapBasePtr && HasMapArraySec);
IsFirstComponentList = false;
}
}
diff --git a/clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp b/clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp
new file mode 100644
index 0000000000000..e2c27f37f5b9d
--- /dev/null
+++ b/clang/test/OpenMP/target_map_both_pointer_pointee_codegen.cpp
@@ -0,0 +1,150 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+extern void *malloc (int __size) throw () __attribute__ ((__malloc__));
+
+void foo() {
+ int *ptr = (int *) malloc(3 * sizeof(int));
+
+ #pragma omp target map(ptr, ptr[0:2])
+ {
+ ptr[1] = 6;
+ }
+ #pragma omp target map(ptr, ptr[2])
+ {
+ ptr[2] = 8;
+ }
+}
+#endif
+// CHECK-LABEL: define {{[^@]+}}@_Z3foov
+// CHECK-SAME: () #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[PTR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [1 x ptr], align 8
+// CHECK-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8
+// CHECK-NEXT: [[CALL:%.*]] = call noalias noundef ptr @_Z6malloci(i32 noundef signext 12) #[[ATTR3:[0-9]+]]
+// CHECK-NEXT: store ptr [[CALL]], ptr [[PTR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[PTR]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 0
+// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[PTR]], ptr [[TMP2]], align 8
+// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[ARRAYIDX]], ptr [[TMP3]], align 8
+// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP4]], align 8
+// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
+// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0
+// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0
+// CHECK-NEXT: store i32 3, ptr [[TMP7]], align 4
+// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1
+// CHECK-NEXT: store i32 1, ptr [[TMP8]], align 4
+// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3
+// CHECK-NEXT: store ptr [[TMP6]], ptr [[TMP10]], align 8
+// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes, ptr [[TMP11]], align 8
+// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes, ptr [[TMP12]], align 8
+// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP13]], align 8
+// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7
+// CHECK-NEXT: store ptr null, ptr [[TMP14]], align 8
+// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP15]], align 8
+// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9
+// CHECK-NEXT: store i64 0, ptr [[TMP16]], align 8
+// CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10
+// CHECK-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP17]], align 4
+// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11
+// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP18]], align 4
+// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12
+// CHECK-NEXT: store i32 0, ptr [[TMP19]], align 4
+// CHECK-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15.region_id, ptr [[KERNEL_ARGS]])
+// CHECK-NEXT: [[TMP21:%.*]] = icmp ne i32 [[TMP20]], 0
+// CHECK-NEXT: br i1 [[TMP21]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
+// CHECK: omp_offload.failed:
+// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15(ptr [[TMP0]]) #[[ATTR3]]
+// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]]
+// CHECK: omp_offload.cont:
+// CHECK-NEXT: [[TMP22:%.*]] = load ptr, ptr [[PTR]], align 8
+// CHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[PTR]], align 8
+// CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP23]], i64 2
+// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[PTR]], ptr [[TMP24]], align 8
+// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
+// CHECK-NEXT: store ptr [[ARRAYIDX1]], ptr [[TMP25]], align 8
+// CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0
+// CHECK-NEXT: store ptr null, ptr [[TMP26]], align 8
+// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0
+// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0
+// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0
+// CHECK-NEXT: store i32 3, ptr [[TMP29]], align 4
+// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1
+// CHECK-NEXT: store i32 1, ptr [[TMP30]], align 4
+// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2
+// CHECK-NEXT: store ptr [[TMP27]], ptr [[TMP31]], align 8
+// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3
+// CHECK-NEXT: store ptr [[TMP28]], ptr [[TMP32]], align 8
+// CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4
+// CHECK-NEXT: store ptr @.offload_sizes.1, ptr [[TMP33]], align 8
+// CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5
+// CHECK-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP34]], align 8
+// CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6
+// CHECK-NEXT: store ptr null, ptr [[TMP35]], align 8
+// CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7
+// CHECK-NEXT: store ptr null, ptr [[TMP36]], align 8
+// CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8
+// CHECK-NEXT: store i64 0, ptr [[TMP37]], align 8
+// CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9
+// CHECK-NEXT: store i64 0, ptr [[TMP38]], align 8
+// CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10
+// CHECK-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP39]], align 4
+// CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11
+// CHECK-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP40]], align 4
+// CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12
+// CHECK-NEXT: store i32 0, ptr [[TMP41]], align 4
+// CHECK-NEXT: [[TMP42:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19.region_id, ptr [[KERNEL_ARGS5]])
+// CHECK-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0
+// CHECK-NEXT: br i1 [[TMP43]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
+// CHECK: omp_offload.failed6:
+// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19(ptr [[TMP22]]) #[[ATTR3]]
+// CHECK-NEXT: br label [[OMP_OFFLOAD_CONT7]]
+// CHECK: omp_offload.cont7:
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l15
+// CHECK-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 1
+// CHECK-NEXT: store i32 6, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l19
+// CHECK-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR2]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8
+// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 2
+// CHECK-NEXT: store i32 8, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT: ret void
+//
diff --git a/offload/test/mapping/map_both_pointer_pointee.c b/offload/test/mapping/map_both_pointer_pointee.c
new file mode 100644
index 0000000000000..c23d218b90429
--- /dev/null
+++ b/offload/test/mapping/map_both_pointer_pointee.c
@@ -0,0 +1,46 @@
+// RUN: %libomptarget-compilexx-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-x86_64-pc-linux-gnu
+// RUN: %libomptarget-compilexx-run-and-check-nvptx64-nvidia-cuda
+
+// REQUIRES: unified_shared_memory
+// UNSUPPORTED: amdgcn-amd-amdhsa
+
+#pragma omp declare target
+int *ptr1;
+#pragma omp end declare target
+
+#include <stdio.h>
+#include <stdlib.h>
+int main() {
+ ptr1 = (int *)malloc(sizeof(int) * 100);
+ int *ptr2;
+ ptr2 = (int *)malloc(sizeof(int) * 100);
+#pragma omp target map(ptr1, ptr1[ : 100])
+ {
+ ptr1[1] = 6;
+ }
+ // CHECK: 6
+ printf(" %d \n", ptr1[1]);
+#pragma omp target data map(ptr1[ : 5])
+ {
+#pragma omp target map(ptr1[2], ptr1, ptr1[3]) map(ptr2, ptr2[2])
+ {
+ ptr1[2] = 7;
+ ptr1[3] = 9;
+ ptr2[2] = 7;
+ }
+ }
+ // CHECK: 7 7 9
+ printf(" %d %d %d \n", ptr2[2], ptr1[2], ptr1[3]);
+ free(ptr1);
+#pragma omp target map(ptr2, ptr2[ : 100])
+ {
+ ptr2[1] = 6;
+ }
+ // CHECK: 6
+ printf(" %d \n", ptr2[1]);
+ free(ptr2);
+ return 0;
+}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
Fix another runtime problem when explicit map both pointer and pointee in target data region. In llvm#92210, problem is only addressed in target region, but missing for target data region. The change just passing AreBothBasePtrAndPteeMapped in generateInfoForComponentList when processing target data.
Fix another runtime problem when explicit map both pointer and pointee in target data region. In #92210, problem is only addressed in target region, but missing for target data region. The change just passing AreBothBasePtrAndPteeMapped in generateInfoForComponentList when processing target data. --------- Co-authored-by: Alexey Bataev <[email protected]>
Fix another runtime problem when explicit map both pointer and pointee in target data region. In llvm#92210, problem is only addressed in target region, but missing for target data region. The change just passing AreBothBasePtrAndPteeMapped in generateInfoForComponentList when processing target data. --------- Co-authored-by: Alexey Bataev <[email protected]>
For ponter int *p for following map, test currently crash.
map(p, p[:100]) or map(p, p[1])
Currly IR looks like
// &p, &p, sizeof(int), TARGET_PARAM | TO | FROM
// &p, p[0], 100sizeof(float) TO | FROM
Worrking IR is
// map(p, p[0:100]) to map(p[0:100])
// &p, &p[0], 100*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
The change is add new argument AreBothBasePtrAndPteeMapped in generateInfoForComponentList
Use that to skip map for map(p), when processing map(p[:100]) generate map with right flag.