-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Attributor] Use known non-flat AS before getAssumedAddrSpace
#143221
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) ChangesIf the underlying object already has a non-flat address space, we simply use Patch is 38.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143221.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 1e908b76d1814..cabd32fe0cf7d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12592,19 +12592,38 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
ChangeStatus updateImpl(Attributor &A) override {
uint32_t OldAddressSpace = AssumedAddressSpace;
+ unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
auto CheckAddressSpace = [&](Value &Obj) {
- if (isa<UndefValue>(&Obj))
+ // Ignore undef and poison.
+ if (isa<UndefValue>(&Obj) || isa<PoisonValue>(&Obj))
return true;
- if (auto *Arg = dyn_cast<Argument>(&Obj)) {
+
+ // If the object already has a non-flat address space, we simply take it.
+ unsigned ObjAS = Obj.getType()->getPointerAddressSpace();
+ if (ObjAS != FlatAS)
+ return takeAddressSpace(ObjAS);
+
+ // At this point, we know Obj is in the flat address space. For a final
+ // attempt, we want to use 'getAssumedAddrSpace', but first we must get
+ // the associated function, if possible.
+ Function *F = nullptr;
+ if (auto *Arg = dyn_cast<Argument>(&Obj))
+ F = Arg->getParent();
+ else if (auto *I = dyn_cast<Instruction>(&Obj))
+ F = I->getFunction();
+
+ // Use getAssumedAddrSpace if the associated function exists.
+ if (F) {
auto *TTI =
- A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
- *Arg->getParent());
- unsigned AssumedAS = TTI->getAssumedAddrSpace(Arg);
+ A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(*F);
+ unsigned AssumedAS = TTI->getAssumedAddrSpace(&Obj);
if (AssumedAS != ~0U)
return takeAddressSpace(AssumedAS);
}
- return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+
+ // Now we can't do anything else but to take the flat AS.
+ return takeAddressSpace(FlatAS);
};
auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
diff --git a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
index cc2c80060231c..78766b4e8eb08 100644
--- a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
+++ b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
@@ -276,3 +276,28 @@ define void @kernel_argument_promotion_pattern_inter_procedure(ptr %p, i32 %val)
call void @use_argument_after_promotion(ptr %p.cast.1, i32 %val)
ret void
}
+
+define amdgpu_kernel void @kernel_argument_with_known_as(ptr addrspace(1) %p1, ptr addrspace(3) %p3, i32 %val) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_argument_with_known_as(
+; CHECK-SAME: ptr addrspace(1) [[P1:%.*]], ptr addrspace(3) [[P3:%.*]], i32 [[VAL:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[P1_CAST:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr
+; CHECK-NEXT: [[P3_CAST:%.*]] = addrspacecast ptr addrspace(3) [[P3]] to ptr
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT: [[P:%.*]] = select i1 [[B]], ptr [[P1_CAST]], ptr [[P3_CAST]]
+; CHECK-NEXT: [[ATOMIC_ADD:%.*]] = atomicrmw add ptr [[P]], i32 1 syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0:![0-9]+]], !amdgpu.no.fine.grained.memory [[META1:![0-9]+]], !amdgpu.no.remote.memory [[META1]]
+; CHECK-NEXT: ret void
+;
+ %p1.cast = addrspacecast ptr addrspace(1) %p1 to ptr
+ %p3.cast = addrspacecast ptr addrspace(3) %p3 to ptr
+ %b = icmp eq i32 %val, 0
+ %p = select i1 %b, ptr %p1.cast, ptr %p3.cast
+ %atomic.add = atomicrmw add ptr %p, i32 1 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1, !amdgpu.no.remote.memory !1
+ ret void
+}
+
+!0 = !{i32 5, i32 6}
+!1 = !{}
+;.
+; CHECK: [[META0]] = !{i32 5, i32 6}
+; CHECK: [[META1]] = !{}
+;.
diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
index 1679a27fdae8b..834c623d27695 100644
--- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll
+++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
@@ -64,7 +64,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l13
; CHECK-NEXT: br label [[_Z3FOOI_INTERNALIZED_EXIT]]
; CHECK: _Z3fooi.internalized.exit:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
-; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr addrspace(5) [[TMP4]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit()
@@ -109,7 +110,8 @@ define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 {
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16
-; CHECK-NEXT: store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: ret void
@@ -141,7 +143,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l16
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16
-; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit()
@@ -175,7 +178,8 @@ define hidden void @_Z4foo1i(i32 noundef %i1) local_unnamed_addr #1 {
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16
-; CHECK-NEXT: store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: ret void
@@ -202,7 +206,8 @@ define internal void @__omp_outlined__(ptr noalias nocapture readnone %.global_t
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I_I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[TMP0]], ptr [[I_I]], align 16
-; CHECK-NEXT: store ptr [[I_I]], ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I_I]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I_I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
@@ -228,15 +233,17 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #5 {
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I_I:%.*]] = alloca [1 x ptr], align 8
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
-; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
-; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I_I_I:%.*]] = call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[TMP4]], ptr [[I_I_I]], align 16
-; CHECK-NEXT: store ptr [[I_I_I]], ptr [[CAPTURED_VARS_ADDRS_I_I]], align 8
-; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
+; CHECK-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I_I_I]], ptr addrspace(5) [[TMP7]], align 8
+; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I_I_I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
; CHECK-NEXT: ret void
@@ -287,7 +294,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #5 {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
-; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP4]], 1
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 29f2030c4d42b..9966c33f0c91b 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -163,17 +163,22 @@ define internal void @convert_and_move_alloca() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[UB_PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
; CHECK-NEXT: br label [[INITLOOP:%.*]]
; CHECK: initloop:
-; CHECK-NEXT: store i32 0, ptr [[IV_PTR]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: br label [[LOOPBODY:%.*]]
; CHECK: loopbody:
-; CHECK-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
-; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
+; CHECK-NEXT: br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
; CHECK: loopinc:
; CHECK-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
; CHECK-NEXT: br label [[LOOPBODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
@@ -183,17 +188,22 @@ define internal void @convert_and_move_alloca() {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-DISABLED-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
+; CHECK-DISABLED-NEXT: [[UB_PTR:%.*]] = alloca i32, align 4
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
; CHECK-DISABLED-NEXT: br label [[INITLOOP:%.*]]
; CHECK-DISABLED: initloop:
-; CHECK-DISABLED-NEXT: store i32 0, ptr [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: store i32 0, ptr addrspace(5) [[TMP1]], align 4
; CHECK-DISABLED-NEXT: br label [[LOOPBODY:%.*]]
; CHECK-DISABLED: loopbody:
-; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
-; CHECK-DISABLED-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
+; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
+; CHECK-DISABLED-NEXT: br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
; CHECK-DISABLED: loopinc:
; CHECK-DISABLED-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
; CHECK-DISABLED-NEXT: br label [[LOOPBODY]]
; CHECK-DISABLED: exit:
; CHECK-DISABLED-NEXT: ret void
@@ -268,7 +278,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-DISABLED: attributes #[[ATTR6]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
-; CHECK: [[META1]] = !DIFile(filename: "remove_globalization.c", directory: {{.*}})
+; CHECK: [[META1]] = !DIFile(filename: "{{.*}}remove_globalization.c", directory: {{.*}})
; CHECK: [[META2]] = !{}
; CHECK: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
@@ -279,7 +289,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: [[META9]] = !DISubroutineType(types: [[META2]])
;.
; CHECK-DISABLED: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
-; CHECK-DISABLED: [[META1]] = !DIFile(filename: "remove_globalization.c", directory: {{.*}})
+; CHECK-DISABLED: [[META1]] = !DIFile(filename: "{{.*}}remove_globalization.c", directory: {{.*}})
; CHECK-DISABLED: [[META2]] = !{}
; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 809fc39442f91..d057e5b233e87 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -71,28 +71,29 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK: user_code.entry:
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[C]], ptr [[AL32]], ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr)
-; CHECK-NEXT: store ptr [[SELECT]], ptr [[LOC]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[LOC]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[SELECT]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
; CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32
; CHECK-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
; CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]]
; CHECK: region.check.tid:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
+; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-NEXT: [[TMP6:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
+; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP6]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP3]])
; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
; CHECK: region.exit:
; CHECK-NEXT: call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
@@ -108,19 +109,20 @@ def...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Shilei Tian (shiltian) ChangesIf the underlying object already has a non-flat address space, we simply use Patch is 38.77 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/143221.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 1e908b76d1814..cabd32fe0cf7d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -12592,19 +12592,38 @@ struct AAAddressSpaceImpl : public AAAddressSpace {
ChangeStatus updateImpl(Attributor &A) override {
uint32_t OldAddressSpace = AssumedAddressSpace;
+ unsigned FlatAS = A.getInfoCache().getFlatAddressSpace().value();
auto CheckAddressSpace = [&](Value &Obj) {
- if (isa<UndefValue>(&Obj))
+ // Ignore undef and poison.
+ if (isa<UndefValue>(&Obj) || isa<PoisonValue>(&Obj))
return true;
- if (auto *Arg = dyn_cast<Argument>(&Obj)) {
+
+ // If the object already has a non-flat address space, we simply take it.
+ unsigned ObjAS = Obj.getType()->getPointerAddressSpace();
+ if (ObjAS != FlatAS)
+ return takeAddressSpace(ObjAS);
+
+ // At this point, we know Obj is in the flat address space. For a final
+ // attempt, we want to use 'getAssumedAddrSpace', but first we must get
+ // the associated function, if possible.
+ Function *F = nullptr;
+ if (auto *Arg = dyn_cast<Argument>(&Obj))
+ F = Arg->getParent();
+ else if (auto *I = dyn_cast<Instruction>(&Obj))
+ F = I->getFunction();
+
+ // Use getAssumedAddrSpace if the associated function exists.
+ if (F) {
auto *TTI =
- A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
- *Arg->getParent());
- unsigned AssumedAS = TTI->getAssumedAddrSpace(Arg);
+ A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(*F);
+ unsigned AssumedAS = TTI->getAssumedAddrSpace(&Obj);
if (AssumedAS != ~0U)
return takeAddressSpace(AssumedAS);
}
- return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+
+ // Now we can't do anything else but to take the flat AS.
+ return takeAddressSpace(FlatAS);
};
auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
diff --git a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
index cc2c80060231c..78766b4e8eb08 100644
--- a/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
+++ b/llvm/test/CodeGen/AMDGPU/aa-as-infer.ll
@@ -276,3 +276,28 @@ define void @kernel_argument_promotion_pattern_inter_procedure(ptr %p, i32 %val)
call void @use_argument_after_promotion(ptr %p.cast.1, i32 %val)
ret void
}
+
+define amdgpu_kernel void @kernel_argument_with_known_as(ptr addrspace(1) %p1, ptr addrspace(3) %p3, i32 %val) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_argument_with_known_as(
+; CHECK-SAME: ptr addrspace(1) [[P1:%.*]], ptr addrspace(3) [[P3:%.*]], i32 [[VAL:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[P1_CAST:%.*]] = addrspacecast ptr addrspace(1) [[P1]] to ptr
+; CHECK-NEXT: [[P3_CAST:%.*]] = addrspacecast ptr addrspace(3) [[P3]] to ptr
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 [[VAL]], 0
+; CHECK-NEXT: [[P:%.*]] = select i1 [[B]], ptr [[P1_CAST]], ptr [[P3_CAST]]
+; CHECK-NEXT: [[ATOMIC_ADD:%.*]] = atomicrmw add ptr [[P]], i32 1 syncscope("agent") seq_cst, align 4, !noalias.addrspace [[META0:![0-9]+]], !amdgpu.no.fine.grained.memory [[META1:![0-9]+]], !amdgpu.no.remote.memory [[META1]]
+; CHECK-NEXT: ret void
+;
+ %p1.cast = addrspacecast ptr addrspace(1) %p1 to ptr
+ %p3.cast = addrspacecast ptr addrspace(3) %p3 to ptr
+ %b = icmp eq i32 %val, 0
+ %p = select i1 %b, ptr %p1.cast, ptr %p3.cast
+ %atomic.add = atomicrmw add ptr %p, i32 1 syncscope("agent") seq_cst, align 4, !noalias.addrspace !0, !amdgpu.no.fine.grained.memory !1, !amdgpu.no.remote.memory !1
+ ret void
+}
+
+!0 = !{i32 5, i32 6}
+!1 = !{}
+;.
+; CHECK: [[META0]] = !{i32 5, i32 6}
+; CHECK: [[META1]] = !{}
+;.
diff --git a/llvm/test/Transforms/OpenMP/nested_parallelism.ll b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
index 1679a27fdae8b..834c623d27695 100644
--- a/llvm/test/Transforms/OpenMP/nested_parallelism.ll
+++ b/llvm/test/Transforms/OpenMP/nested_parallelism.ll
@@ -64,7 +64,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l13
; CHECK-NEXT: br label [[_Z3FOOI_INTERNALIZED_EXIT]]
; CHECK: _Z3fooi.internalized.exit:
; CHECK-NEXT: tail call void @__kmpc_barrier_simple_spmd(ptr nonnull @[[GLOB1]], i32 [[TMP2]]) #[[ATTR2]]
-; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i_shared to ptr), ptr addrspace(5) [[TMP4]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit()
@@ -109,7 +110,8 @@ define hidden void @_Z3fooi(i32 noundef %i1) local_unnamed_addr #1 {
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16
-; CHECK-NEXT: store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__, ptr nonnull @__omp_outlined___wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: ret void
@@ -141,7 +143,8 @@ define weak_odr protected ptx_kernel void @__omp_offloading_10302_bd7e0_main_l16
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(3) @i.i_shared, align 16
-; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr addrspacecast (ptr addrspace(3) @i.i_shared to ptr), ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
; CHECK-NEXT: call void @__kmpc_target_deinit()
@@ -175,7 +178,8 @@ define hidden void @_Z4foo1i(i32 noundef %i1) local_unnamed_addr #1 {
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[I1:%.*]], ptr [[I]], align 16
-; CHECK-NEXT: store ptr [[I]], ptr [[CAPTURED_VARS_ADDRS]], align 8
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I]], ptr addrspace(5) [[TMP1]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: ret void
@@ -202,7 +206,8 @@ define internal void @__omp_outlined__(ptr noalias nocapture readnone %.global_t
; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I_I:%.*]] = tail call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[TMP0]], ptr [[I_I]], align 16
-; CHECK-NEXT: store ptr [[I_I]], ptr [[CAPTURED_VARS_ADDRS_I]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I_I]], ptr addrspace(5) [[TMP2]], align 8
; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I_I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I]])
@@ -228,15 +233,17 @@ define internal void @__omp_outlined___wrapper(i16 zeroext %0, i32 %1) #5 {
; CHECK-NEXT: [[CAPTURED_VARS_ADDRS_I_I:%.*]] = alloca [1 x ptr], align 8
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
-; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
-; CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR2]]
; CHECK-NEXT: [[I_I_I:%.*]] = call align 16 dereferenceable_or_null(4) ptr @__kmpc_alloc_shared(i64 4) #[[ATTR2]]
; CHECK-NEXT: store i32 [[TMP4]], ptr [[I_I_I]], align 16
-; CHECK-NEXT: store ptr [[I_I_I]], ptr [[CAPTURED_VARS_ADDRS_I_I]], align 8
-; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
+; CHECK-NEXT: [[TMP7:%.*]] = addrspacecast ptr [[CAPTURED_VARS_ADDRS_I_I]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[I_I_I]], ptr addrspace(5) [[TMP7]], align 8
+; CHECK-NEXT: call void @__kmpc_parallel_51(ptr nonnull @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, ptr nonnull @__omp_outlined__1, ptr nonnull @__omp_outlined__1_wrapper, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]], i64 1)
; CHECK-NEXT: call void @__kmpc_free_shared(ptr [[I_I_I]], i64 4) #[[ATTR2]]
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[CAPTURED_VARS_ADDRS_I_I]])
; CHECK-NEXT: ret void
@@ -287,7 +294,8 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #5 {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
; CHECK-NEXT: call void @__kmpc_get_shared_variables(ptr nonnull [[GLOBAL_ARGS]])
-; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[GLOBAL_ARGS]] to ptr addrspace(5)
+; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspace(5) [[TMP5]], align 8
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
; CHECK-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP4]], 1
diff --git a/llvm/test/Transforms/OpenMP/remove_globalization.ll b/llvm/test/Transforms/OpenMP/remove_globalization.ll
index 29f2030c4d42b..9966c33f0c91b 100644
--- a/llvm/test/Transforms/OpenMP/remove_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/remove_globalization.ll
@@ -163,17 +163,22 @@ define internal void @convert_and_move_alloca() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[UB_PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
; CHECK-NEXT: br label [[INITLOOP:%.*]]
; CHECK: initloop:
-; CHECK-NEXT: store i32 0, ptr [[IV_PTR]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: store i32 0, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: br label [[LOOPBODY:%.*]]
; CHECK: loopbody:
-; CHECK-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
-; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
-; CHECK-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
+; CHECK-NEXT: br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
; CHECK: loopinc:
; CHECK-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-NEXT: store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
; CHECK-NEXT: br label [[LOOPBODY]]
; CHECK: exit:
; CHECK-NEXT: ret void
@@ -183,17 +188,22 @@ define internal void @convert_and_move_alloca() {
; CHECK-DISABLED-NEXT: entry:
; CHECK-DISABLED-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 4
; CHECK-DISABLED-NEXT: [[IV_PTR:%.*]] = alloca i32, align 4
+; CHECK-DISABLED-NEXT: [[UB_PTR:%.*]] = alloca i32, align 4
+; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[UB_PTR]] to ptr addrspace(5)
; CHECK-DISABLED-NEXT: br label [[INITLOOP:%.*]]
; CHECK-DISABLED: initloop:
-; CHECK-DISABLED-NEXT: store i32 0, ptr [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: store i32 0, ptr addrspace(5) [[TMP1]], align 4
; CHECK-DISABLED-NEXT: br label [[LOOPBODY:%.*]]
; CHECK-DISABLED: loopbody:
-; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr [[IV_PTR]], align 4
-; CHECK-DISABLED-NEXT: [[TMP0:%.*]] = icmp eq i32 [[IV]], 10
-; CHECK-DISABLED-NEXT: br i1 [[TMP0]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
+; CHECK-DISABLED-NEXT: [[TMP2:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: [[IV:%.*]] = load i32, ptr addrspace(5) [[TMP2]], align 4
+; CHECK-DISABLED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[IV]], 10
+; CHECK-DISABLED-NEXT: br i1 [[TMP3]], label [[EXIT:%.*]], label [[LOOPINC:%.*]]
; CHECK-DISABLED: loopinc:
; CHECK-DISABLED-NEXT: [[INC:%.*]] = add i32 [[IV]], 1
-; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr [[IV_PTR]], align 4
+; CHECK-DISABLED-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[IV_PTR]] to ptr addrspace(5)
+; CHECK-DISABLED-NEXT: store i32 [[INC]], ptr addrspace(5) [[TMP4]], align 4
; CHECK-DISABLED-NEXT: br label [[LOOPBODY]]
; CHECK-DISABLED: exit:
; CHECK-DISABLED-NEXT: ret void
@@ -268,7 +278,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK-DISABLED: attributes #[[ATTR6]] = { nounwind }
;.
; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
-; CHECK: [[META1]] = !DIFile(filename: "remove_globalization.c", directory: {{.*}})
+; CHECK: [[META1]] = !DIFile(filename: "{{.*}}remove_globalization.c", directory: {{.*}})
; CHECK: [[META2]] = !{}
; CHECK: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
; CHECK: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
@@ -279,7 +289,7 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
; CHECK: [[META9]] = !DISubroutineType(types: [[META2]])
;.
; CHECK-DISABLED: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None)
-; CHECK-DISABLED: [[META1]] = !DIFile(filename: "remove_globalization.c", directory: {{.*}})
+; CHECK-DISABLED: [[META1]] = !DIFile(filename: "{{.*}}remove_globalization.c", directory: {{.*}})
; CHECK-DISABLED: [[META2]] = !{}
; CHECK-DISABLED: [[META3:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
; CHECK-DISABLED: [[META4:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
index 809fc39442f91..d057e5b233e87 100644
--- a/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization_guarding.ll
@@ -71,28 +71,29 @@ define weak ptx_kernel void @__omp_offloading_2a_fbfa7a_sequential_loop_l6(ptr %
; CHECK: user_code.entry:
; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[N]], 42
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[C]], ptr [[AL32]], ptr addrspacecast (ptr addrspace(5) @LocGlob to ptr)
-; CHECK-NEXT: store ptr [[SELECT]], ptr [[LOC]], align 8
-; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[LOC]] to ptr addrspace(5)
+; CHECK-NEXT: store ptr [[SELECT]], ptr addrspace(5) [[TMP1]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr nonnull @[[GLOB1]]) #[[ATTR6]]
; CHECK-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 1
; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[N]], 32
; CHECK-NEXT: [[IDXPROM_I:%.*]] = ashr exact i64 [[SEXT]], 32
; CHECK-NEXT: [[ARRAYIDX2_I:%.*]] = getelementptr inbounds i32, ptr [[X]], i64 [[IDXPROM_I]]
; CHECK-NEXT: br label [[REGION_CHECK_TID:%.*]]
; CHECK: region.check.tid:
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
-; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[TMP3]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[TMP4]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
; CHECK: region.guarded:
; CHECK-NEXT: store i32 0, ptr [[X]], align 4, !noalias [[META7:![0-9]+]]
-; CHECK-NEXT: [[TMP4:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP4]], align 4, !noalias [[META7]]
-; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
-; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-NEXT: [[TMP5:%.*]] = addrspacecast ptr [[ARRAYIDX1_I]] to ptr addrspace(1)
+; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP5]], align 4, !noalias [[META7]]
+; CHECK-NEXT: [[TMP6:%.*]] = addrspacecast ptr [[ARRAYIDX2_I]] to ptr addrspace(1)
+; CHECK-NEXT: store i32 [[N_ADDR_SROA_0_0_EXTRACT_TRUNC]], ptr addrspace(1) [[TMP6]], align 4, !noalias [[META7]]
; CHECK-NEXT: br label [[REGION_GUARDED_END:%.*]]
; CHECK: region.guarded.end:
; CHECK-NEXT: br label [[REGION_BARRIER]]
; CHECK: region.barrier:
-; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP2]])
+; CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(ptr @[[GLOB2]], i32 [[TMP3]])
; CHECK-NEXT: br label [[REGION_EXIT:%.*]]
; CHECK: region.exit:
; CHECK-NEXT: call void @usei8ptr(ptr captures(none) [[HEAP2STACK_H2S]]) #[[ATTR9:[0-9]+]]
@@ -108,19 +109,20 @@ def...
[truncated]
|
aabf3ca
to
c507626
Compare
If the underlying object already has a non-flat address space, we simply use that before calling `getAssumedAddrSpace`.
c507626
to
97993c7
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/144/builds/27180 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/10/builds/6951 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/133/builds/17501 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/24659 Here is the relevant piece of the build log for the reference
|
The test failures have been fixed in 64bd4d9. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/11/builds/16898 Here is the relevant piece of the build log for the reference
|
…#143221) If the underlying object already has a non-flat address space, we simply use that before calling `getAssumedAddrSpace`. Partially fixes SWDEV-536263.
…#143221) If the underlying object already has a non-flat address space, we simply use that before calling `getAssumedAddrSpace`. Partially fixes SWDEV-536263.
…#143221) If the underlying object already has a non-flat address space, we simply use that before calling `getAssumedAddrSpace`. Partially fixes SWDEV-536263.
If the underlying object already has a non-flat address space, we simply use
that before calling
getAssumedAddrSpace
.Partially fixes SWDEV-536263.