Skip to content

Commit dc29901

Browse files
ro-iarsenm
andauthored
[AMDGPU] PromoteAlloca: handle out-of-bounds GEP for shufflevector (#139700)
This LLVM defect was identified via the AMD Fuzzing project. --------- Co-authored-by: Matt Arsenault <[email protected]>
1 parent d360281 commit dc29901

File tree

2 files changed

+47
-1
lines changed

2 files changed

+47
-1
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,7 +666,9 @@ static Value *promoteAllocaUserToVector(
666666
SmallVector<int> Mask;
667667
for (unsigned Idx = 0; Idx < VectorTy->getNumElements(); ++Idx) {
668668
if (Idx >= DestBegin && Idx < DestBegin + NumCopied) {
669-
Mask.push_back(SrcBegin++);
669+
Mask.push_back(SrcBegin < VectorTy->getNumElements()
670+
? SrcBegin++
671+
: PoisonMaskElem);
670672
} else {
671673
Mask.push_back(Idx);
672674
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple amdgcn -passes=amdgpu-promote-alloca-to-vector -S < %s | FileCheck %s
3+
4+
; Skip promote-alloca in case of an index which is known to be out of bounds.
5+
6+
define amdgpu_kernel void @out_of_bounds() {
7+
; CHECK-LABEL: define amdgpu_kernel void @out_of_bounds() {
8+
; CHECK-NEXT: [[PTR:%.*]] = freeze <4 x float> poison
9+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[PTR]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 2, i32 3>
10+
; CHECK-NEXT: ret void
11+
;
12+
%ptr = alloca [4 x float], align 4, addrspace(5)
13+
%elem_ptr = getelementptr [4 x float], ptr addrspace(5) %ptr, i32 0, i32 42
14+
call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) %ptr, ptr addrspace(5) %elem_ptr, i32 8, i1 false)
15+
ret void
16+
}
17+
18+
define amdgpu_kernel void @memcpy_partially_out_of_bounds() {
19+
; CHECK-LABEL: define amdgpu_kernel void @memcpy_partially_out_of_bounds() {
20+
; CHECK-NEXT: [[PTR:%.*]] = freeze <3 x float> poison
21+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x float> [[PTR]], <3 x float> poison, <3 x i32> <i32 2, i32 poison, i32 2>
22+
; CHECK-NEXT: ret void
23+
;
24+
%ptr = alloca [3 x float], align 4, addrspace(5)
25+
%elem_ptr = getelementptr [3 x float], ptr addrspace(5) %ptr, i32 0, i32 2
26+
call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) %ptr, ptr addrspace(5) %elem_ptr, i32 8, i1 false)
27+
ret void
28+
}
29+
30+
define amdgpu_kernel void @in_bounds() {
31+
; CHECK-LABEL: define amdgpu_kernel void @in_bounds() {
32+
; CHECK-NEXT: [[PTR:%.*]] = freeze <4 x float> poison
33+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[PTR]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
34+
; CHECK-NEXT: ret void
35+
;
36+
%ptr = alloca [4 x float], align 4, addrspace(5)
37+
%elem_ptr = getelementptr [4 x float], ptr addrspace(5) %ptr, i32 0, i32 2
38+
call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) %ptr, ptr addrspace(5) %elem_ptr, i32 8, i1 false)
39+
ret void
40+
}
41+
42+
declare void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) writeonly captures(none), ptr addrspace(5) readonly captures(none), i32, i1 immarg) #0
43+
44+
attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }

0 commit comments

Comments
 (0)