Skip to content

Commit cf188d6

Browse files
authored
[AMDGPU] Avoid crashes for non-byte-sized types in PromoteAlloca (#134042)
This patch addresses three problems when promoting allocas to vectors: - Element types with size < 1 byte in allocas with a vector type caused divisions by zero. - Element types whose size doesn't match their AllocSize hit an assertion. - Access types whose size doesn't match their AllocSize hit an assertion. With this patch, we do not attempt to promote affected allocas to vectors. In principle, we could handle these cases in PromoteAlloca, e.g., by truncating and extending elements from/to their allocation size. It's however unclear if we ever encounter such cases in practice, so that doesn't seem worth the added complexity. For SWDEV-511252
1 parent 150e7b1 commit cf188d6

File tree

2 files changed

+228
-10
lines changed

2 files changed

+228
-10
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,11 @@ static bool isSupportedAccessType(FixedVectorType *VecTy, Type *AccessTy,
729729
// complicated.
730730
if (isa<FixedVectorType>(AccessTy)) {
731731
TypeSize AccTS = DL.getTypeStoreSize(AccessTy);
732+
// If the type size and the store size don't match, we would need to do more
733+
// than just bitcast to translate between an extracted/insertable subvectors
734+
// and the accessed value.
735+
if (AccTS * 8 != DL.getTypeSizeInBits(AccessTy))
736+
return false;
732737
TypeSize VecTS = DL.getTypeStoreSize(VecTy->getElementType());
733738
return AccTS.isKnownMultipleOf(VecTS);
734739
}
@@ -813,15 +818,17 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
813818

814819
if (VectorType::isValidElementType(ElemTy) && NumElems > 0) {
815820
unsigned ElementSize = DL->getTypeSizeInBits(ElemTy) / 8;
816-
unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);
817-
// Expand vector if required to match padding of inner type,
818-
// i.e. odd size subvectors.
819-
// Storage size of new vector must match that of alloca for correct
820-
// behaviour of byte offsets and GEP computation.
821-
if (NumElems * ElementSize != AllocaSize)
822-
NumElems = AllocaSize / ElementSize;
823-
if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
824-
VectorTy = FixedVectorType::get(ElemTy, NumElems);
821+
if (ElementSize > 0) {
822+
unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);
823+
// Expand vector if required to match padding of inner type,
824+
// i.e. odd size subvectors.
825+
// Storage size of new vector must match that of alloca for correct
826+
// behaviour of byte offsets and GEP computation.
827+
if (NumElems * ElementSize != AllocaSize)
828+
NumElems = AllocaSize / ElementSize;
829+
if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
830+
VectorTy = FixedVectorType::get(ElemTy, NumElems);
831+
}
825832
}
826833
}
827834

@@ -861,7 +868,14 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
861868
LLVM_DEBUG(dbgs() << " Attempting promotion to: " << *VectorTy << "\n");
862869

863870
Type *VecEltTy = VectorTy->getElementType();
864-
unsigned ElementSize = DL->getTypeSizeInBits(VecEltTy) / 8;
871+
unsigned ElementSizeInBits = DL->getTypeSizeInBits(VecEltTy);
872+
if (ElementSizeInBits != DL->getTypeAllocSizeInBits(VecEltTy)) {
873+
LLVM_DEBUG(dbgs() << " Cannot convert to vector if the allocation size "
874+
"does not match the type's size\n");
875+
return false;
876+
}
877+
unsigned ElementSize = ElementSizeInBits / 8;
878+
assert(ElementSize > 0);
865879
for (auto *U : Uses) {
866880
Instruction *Inst = cast<Instruction>(U->getUser());
867881

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
3+
4+
; Check that types where the store/allocation sizes don't match the type size
5+
; don't crash.
6+
7+
8+
define <7 x i9> @load_elem_i9_access_7xi9() {
9+
; CHECK-LABEL: @load_elem_i9_access_7xi9(
10+
; CHECK-NEXT: [[P:%.*]] = alloca <16 x i9>, align 1, addrspace(5)
11+
; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
12+
; CHECK-NEXT: [[L:%.*]] = load <7 x i9>, ptr addrspace(5) [[G]], align 1
13+
; CHECK-NEXT: ret <7 x i9> [[L]]
14+
;
15+
%p = alloca <16 x i9>, align 1, addrspace(5)
16+
%g = getelementptr i8, ptr addrspace(5) %p, i64 4
17+
%l = load <7 x i9>, ptr addrspace(5) %g, align 1
18+
ret <7 x i9> %l
19+
}
20+
21+
define <8 x i1> @load_elem_i1_access_8xi1() {
22+
; CHECK-LABEL: @load_elem_i1_access_8xi1(
23+
; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5)
24+
; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
25+
; CHECK-NEXT: [[L:%.*]] = load <8 x i1>, ptr addrspace(5) [[G]], align 1
26+
; CHECK-NEXT: ret <8 x i1> [[L]]
27+
;
28+
%p = alloca <16 x i1>, align 1, addrspace(5)
29+
%g = getelementptr i8, ptr addrspace(5) %p, i64 4
30+
%l = load <8 x i1>, ptr addrspace(5) %g, align 1
31+
ret <8 x i1> %l
32+
}
33+
34+
define <3 x i1> @load_elem_i1_access_3xi1() {
35+
; CHECK-LABEL: @load_elem_i1_access_3xi1(
36+
; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5)
37+
; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
38+
; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
39+
; CHECK-NEXT: ret <3 x i1> [[L]]
40+
;
41+
%p = alloca <16 x i1>, align 1, addrspace(5)
42+
%g = getelementptr i8, ptr addrspace(5) %p, i64 4
43+
%l = load <3 x i1>, ptr addrspace(5) %g, align 1
44+
ret <3 x i1> %l
45+
}
46+
47+
define <3 x i1> @load_elem_i8_access_3xi1() {
48+
; CHECK-LABEL: @load_elem_i8_access_3xi1(
49+
; CHECK-NEXT: [[P:%.*]] = alloca <8 x i8>, align 1, addrspace(5)
50+
; CHECK-NEXT: store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) [[P]], align 1
51+
; CHECK-NEXT: [[G:%.*]] = getelementptr <4 x i8>, ptr addrspace(5) [[P]], i64 1
52+
; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
53+
; CHECK-NEXT: ret <3 x i1> [[L]]
54+
;
55+
%p = alloca <8 x i8>, align 1, addrspace(5)
56+
store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
57+
%g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
58+
%l = load <3 x i1>, ptr addrspace(5) %g, align 1
59+
ret <3 x i1> %l
60+
}
61+
62+
; This one is actually not problematic.
63+
define <8 x i1> @load_elem_i8_access_8xi1() {
64+
; CHECK-LABEL: @load_elem_i8_access_8xi1(
65+
; CHECK-NEXT: [[P:%.*]] = freeze <8 x i8> poison
66+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>
67+
;
68+
%p = alloca <8 x i8>, align 1, addrspace(5)
69+
store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
70+
%g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
71+
%l = load <8 x i1>, ptr addrspace(5) %g, align 1
72+
ret <8 x i1> %l
73+
}
74+
75+
define <8 x i1> @storeload_elem_i1_access_8xi1() {
76+
; CHECK-LABEL: @storeload_elem_i1_access_8xi1(
77+
; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5)
78+
; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
79+
; CHECK-NEXT: store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) [[G]], align 1
80+
; CHECK-NEXT: [[L:%.*]] = load <8 x i1>, ptr addrspace(5) [[G]], align 1
81+
; CHECK-NEXT: ret <8 x i1> [[L]]
82+
;
83+
%p = alloca <16 x i1>, align 1, addrspace(5)
84+
%g = getelementptr i8, ptr addrspace(5) %p, i64 4
85+
store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1
86+
%l = load <8 x i1>, ptr addrspace(5) %g, align 1
87+
ret <8 x i1> %l
88+
}
89+
90+
define <3 x i1> @storeload_elem_i1_access_3xi1() {
91+
; CHECK-LABEL: @storeload_elem_i1_access_3xi1(
92+
; CHECK-NEXT: [[P:%.*]] = alloca <16 x i1>, align 1, addrspace(5)
93+
; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
94+
; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1
95+
; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
96+
; CHECK-NEXT: ret <3 x i1> [[L]]
97+
;
98+
%p = alloca <16 x i1>, align 1, addrspace(5)
99+
%g = getelementptr i8, ptr addrspace(5) %p, i64 4
100+
store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1
101+
%l = load <3 x i1>, ptr addrspace(5) %g, align 1
102+
ret <3 x i1> %l
103+
}
104+
105+
define <3 x i1> @storeload_elem_i8_access_3xi1() {
106+
; CHECK-LABEL: @storeload_elem_i8_access_3xi1(
107+
; CHECK-NEXT: [[P:%.*]] = alloca <8 x i8>, align 1, addrspace(5)
108+
; CHECK-NEXT: store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) [[P]], align 1
109+
; CHECK-NEXT: [[G:%.*]] = getelementptr <4 x i8>, ptr addrspace(5) [[P]], i64 1
110+
; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1
111+
; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
112+
; CHECK-NEXT: ret <3 x i1> [[L]]
113+
;
114+
%p = alloca <8 x i8>, align 1, addrspace(5)
115+
store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
116+
%g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
117+
store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1
118+
%l = load <3 x i1>, ptr addrspace(5) %g, align 1
119+
ret <3 x i1> %l
120+
}
121+
122+
; This one is actually not problematic.
123+
define <8 x i1> @storeload_elem_i8_access_8xi1() {
124+
; CHECK-LABEL: @storeload_elem_i8_access_8xi1(
125+
; CHECK-NEXT: [[P:%.*]] = freeze <8 x i8> poison
126+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>
127+
;
128+
%p = alloca <8 x i8>, align 1, addrspace(5)
129+
store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
130+
%g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
131+
store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1
132+
%l = load <8 x i1>, ptr addrspace(5) %g, align 1
133+
ret <8 x i1> %l
134+
}
135+
136+
define <8 x i1> @array_of_vec_elem_i1_access_8xi1() {
137+
; CHECK-LABEL: @array_of_vec_elem_i1_access_8xi1(
138+
; CHECK-NEXT: [[P:%.*]] = alloca [2 x <16 x i1>], align 1, addrspace(5)
139+
; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
140+
; CHECK-NEXT: store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) [[G]], align 1
141+
; CHECK-NEXT: [[L:%.*]] = load <8 x i1>, ptr addrspace(5) [[G]], align 1
142+
; CHECK-NEXT: ret <8 x i1> [[L]]
143+
;
144+
%p = alloca [2 x <16 x i1>], align 1, addrspace(5)
145+
%g = getelementptr i8, ptr addrspace(5) %p, i64 4
146+
store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1
147+
%l = load <8 x i1>, ptr addrspace(5) %g, align 1
148+
ret <8 x i1> %l
149+
}
150+
151+
define <3 x i1> @array_of_vec_elem_i1_access_3xi1() {
152+
; CHECK-LABEL: @array_of_vec_elem_i1_access_3xi1(
153+
; CHECK-NEXT: [[P:%.*]] = alloca [2 x <16 x i1>], align 1, addrspace(5)
154+
; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr addrspace(5) [[P]], i64 4
155+
; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1
156+
; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
157+
; CHECK-NEXT: ret <3 x i1> [[L]]
158+
;
159+
%p = alloca [2 x <16 x i1>], align 1, addrspace(5)
160+
%g = getelementptr i8, ptr addrspace(5) %p, i64 4
161+
store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1
162+
%l = load <3 x i1>, ptr addrspace(5) %g, align 1
163+
ret <3 x i1> %l
164+
}
165+
166+
define <3 x i1> @array_of_vec_elem_i8_access_3xi1() {
167+
; CHECK-LABEL: @array_of_vec_elem_i8_access_3xi1(
168+
; CHECK-NEXT: [[P:%.*]] = alloca [2 x <8 x i8>], align 1, addrspace(5)
169+
; CHECK-NEXT: store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) [[P]], align 1
170+
; CHECK-NEXT: [[G:%.*]] = getelementptr <4 x i8>, ptr addrspace(5) [[P]], i64 1
171+
; CHECK-NEXT: store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) [[G]], align 1
172+
; CHECK-NEXT: [[L:%.*]] = load <3 x i1>, ptr addrspace(5) [[G]], align 1
173+
; CHECK-NEXT: ret <3 x i1> [[L]]
174+
;
175+
%p = alloca [2 x <8 x i8>], align 1, addrspace(5)
176+
store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
177+
%g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
178+
store <3 x i1> <i1 true, i1 false, i1 true>, ptr addrspace(5) %g, align 1
179+
%l = load <3 x i1>, ptr addrspace(5) %g, align 1
180+
ret <3 x i1> %l
181+
}
182+
183+
; This one is actually not problematic.
184+
define <8 x i1> @array_of_vec_elem_i8_access_8xi1() {
185+
; CHECK-LABEL: @array_of_vec_elem_i8_access_8xi1(
186+
; CHECK-NEXT: [[P:%.*]] = freeze <16 x i8> poison
187+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> [[P]], i8 1, i32 0
188+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i8> [[TMP1]], i8 2, i32 1
189+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i8> [[TMP2]], i8 3, i32 2
190+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i8> [[TMP3]], i8 4, i32 3
191+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i8> [[TMP4]], i8 5, i32 4
192+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i8> [[TMP5]], i8 6, i32 5
193+
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i8> [[TMP6]], i8 7, i32 6
194+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i8> [[TMP7]], i8 8, i32 7
195+
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i8> [[TMP8]], i8 5, i32 4
196+
; CHECK-NEXT: ret <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>
197+
;
198+
%p = alloca [2 x <8 x i8>], align 1, addrspace(5)
199+
store <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>, ptr addrspace(5) %p, align 1
200+
%g = getelementptr <4 x i8>, ptr addrspace(5) %p, i64 1
201+
store <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, ptr addrspace(5) %g, align 1
202+
%l = load <8 x i1>, ptr addrspace(5) %g, align 1
203+
ret <8 x i1> %l
204+
}

0 commit comments

Comments
 (0)