Skip to content

Commit beda9d0

Browse files
committed
AMDGPU: Skip GetUnderlyingObject check in pointsToConstantMemory
Check the address space first before searching for the object definition to save compile time. As an added bonus, this will now treat casts to constant addrspace as constant. We also seemed to be missing targeted tests for this, so add a few missing other cases too.
1 parent f8b09f7 commit beda9d0

File tree

2 files changed

+119
-3
lines changed

2 files changed

+119
-3
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,16 @@ AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
9191

9292
bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
9393
AAQueryInfo &AAQI, bool OrLocal) {
94+
unsigned AS = Loc.Ptr->getType()->getPointerAddressSpace();
95+
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
96+
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
97+
return true;
98+
9499
const Value *Base = GetUnderlyingObject(Loc.Ptr, DL);
95-
unsigned AS = Base->getType()->getPointerAddressSpace();
100+
AS = Base->getType()->getPointerAddressSpace();
96101
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
97-
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
102+
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
98103
return true;
99-
}
100104

101105
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
102106
if (GV->isConstant())
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -amdgpu-aa-wrapper -amdgpu-aa -instcombine -o - %s | FileCheck %s
3+
;
4+
; Test AA::pointsToConstantMemory implementation. These should mostly
5+
; turn out to be stores to constant memory, and will therefore be
6+
; deleted as UB.
7+
8+
define void @test_constant_addrspace(i8 addrspace(4)* %p) {
9+
; CHECK-LABEL: @test_constant_addrspace(
10+
; CHECK-NEXT: ret void
11+
;
12+
store i8 0, i8 addrspace(4)* %p
13+
ret void
14+
}
15+
16+
define void @test_constant32bit_addrspace(i8 addrspace(6)* %p) {
17+
; CHECK-LABEL: @test_constant32bit_addrspace(
18+
; CHECK-NEXT: ret void
19+
;
20+
store i8 0, i8 addrspace(6)* %p
21+
ret void
22+
}
23+
24+
define void @test_cast_generic_from_constant_addrspace(i8 addrspace(4)* %p) {
25+
; CHECK-LABEL: @test_cast_generic_from_constant_addrspace(
26+
; CHECK-NEXT: ret void
27+
;
28+
%cast = addrspacecast i8 addrspace(4)* %p to i8*
29+
store i8 0, i8* %cast
30+
ret void
31+
}
32+
33+
define void @test_cast_generic_from_constant32bit_addrspace(i8 addrspace(6)* %p) {
34+
; CHECK-LABEL: @test_cast_generic_from_constant32bit_addrspace(
35+
; CHECK-NEXT: ret void
36+
;
37+
%cast = addrspacecast i8 addrspace(6)* %p to i8*
38+
store i8 0, i8* %cast
39+
ret void
40+
}
41+
42+
define void @test_cast_generic_to_constant_addrspace(i8* %p) {
43+
; CHECK-LABEL: @test_cast_generic_to_constant_addrspace(
44+
; CHECK-NEXT: ret void
45+
;
46+
%cast = addrspacecast i8* %p to i8 addrspace(4)*
47+
store i8 0, i8 addrspace(4)* %cast
48+
ret void
49+
}
50+
51+
define void @test_cast_generic_to_constant32bit_addrspace(i8* %p) {
52+
; CHECK-LABEL: @test_cast_generic_to_constant32bit_addrspace(
53+
; CHECK-NEXT: ret void
54+
;
55+
%cast = addrspacecast i8* %p to i8 addrspace(6)*
56+
store i8 0, i8 addrspace(6)* %cast
57+
ret void
58+
}
59+
60+
define amdgpu_kernel void @noalias_readnone_global_kernarg(i32 addrspace(1)* noalias readnone %arg) {
61+
; CHECK-LABEL: @noalias_readnone_global_kernarg(
62+
; CHECK-NEXT: ret void
63+
;
64+
store i32 0, i32 addrspace(1)* %arg
65+
ret void
66+
}
67+
68+
define amdgpu_kernel void @noalias_readonly_global_kernarg(i32 addrspace(1)* noalias readonly %arg) {
69+
; CHECK-LABEL: @noalias_readonly_global_kernarg(
70+
; CHECK-NEXT: ret void
71+
;
72+
store i32 0, i32 addrspace(1)* %arg
73+
ret void
74+
}
75+
76+
define amdgpu_kernel void @readnone_global_kernarg(i32 addrspace(1)* readnone %arg) {
77+
; CHECK-LABEL: @readnone_global_kernarg(
78+
; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[ARG:%.*]], align 4
79+
; CHECK-NEXT: ret void
80+
;
81+
store i32 0, i32 addrspace(1)* %arg
82+
ret void
83+
}
84+
85+
define amdgpu_kernel void @readonly_global_kernarg(i32 addrspace(1)* readonly %arg) {
86+
; CHECK-LABEL: @readonly_global_kernarg(
87+
; CHECK-NEXT: store i32 0, i32 addrspace(1)* [[ARG:%.*]], align 4
88+
; CHECK-NEXT: ret void
89+
;
90+
store i32 0, i32 addrspace(1)* %arg
91+
ret void
92+
}
93+
94+
@global_as_constant = external addrspace(1) constant i32, align 4
95+
96+
define amdgpu_kernel void @constant_gv_global_as() {
97+
; CHECK-LABEL: @constant_gv_global_as(
98+
; CHECK-NEXT: ret void
99+
;
100+
store i32 0, i32 addrspace(1)* @global_as_constant
101+
ret void
102+
}
103+
104+
@global_nonconstant_constant_as = external addrspace(4) global i32, align 4
105+
106+
define amdgpu_kernel void @nonconst_gv_constant_as() {
107+
; CHECK-LABEL: @nonconst_gv_constant_as(
108+
; CHECK-NEXT: ret void
109+
;
110+
store i32 0, i32 addrspace(4)* @global_nonconstant_constant_as
111+
ret void
112+
}

0 commit comments

Comments
 (0)