Skip to content

Commit 0b7ff03

Browse files
committed
[instCombine][bugfix] Fix crash caused by using of cast in instCombineSVECmpNE.
Func instCombineSVECmpNE is used to identify specific pattern of instruction 'svecmene', and then predict its result, use the result to replace instruction 'svecmene'. The specific pattern can be descriped below: 1.The svecmpne must compare all elements of vec. 2.The svecmpne inst compare its ves with zero. 3.The vec in svecmpne inst is generated by inst dupqlane, and the copy value of this dupqlane must be zero. In NO.3 above, func instCombineSVECmpNE uses 'cast' to transform op1 of dupqlane without checking if the cast is success, then generate a crash in some situation.
1 parent 26ca782 commit 0b7ff03

File tree

2 files changed

+60
-1
lines changed

2 files changed

+60
-1
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1174,7 +1174,8 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
11741174
return std::nullopt;
11751175

11761176
// Where the dupq is a lane 0 replicate of a vector insert
1177-
if (!cast<ConstantInt>(DupQLane->getArgOperand(1))->isZero())
1177+
auto *DupQLaneOp1 = dyn_cast<ConstantInt>(DupQLane->getArgOperand(1));
1178+
if (!DupQLaneOp1 || !DupQLaneOp1->isZero())
11781179
return std::nullopt;
11791180

11801181
auto *VecIns = dyn_cast<IntrinsicInst>(DupQLane->getArgOperand(0));
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; RUN: opt -S -mtriple=aarch64-unknown-linux-gnu -O2 < %s | FileCheck %s
2+
3+
; Function Attrs: nofree nosync nounwind readnone uwtable vscale_range(1,16)
4+
define dso_local i32 @testInstCombineSVECmpNE() local_unnamed_addr #0 {
5+
entry:
6+
%0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8 42, i8 1)
7+
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
8+
br label %for.body
9+
10+
for.cond.cleanup: ; preds = %for.inc
11+
%2 = tail call i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %cmp_rslt.1)
12+
%not. = xor i1 %2, true
13+
%. = zext i1 %not. to i32
14+
ret i32 %.
15+
16+
for.body: ; preds = %entry, %for.inc
17+
%i.010 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
18+
%cmp1 = icmp ugt i64 %i.010, 32
19+
%3 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %0, i64 %i.010)
20+
br i1 %cmp1, label %if.then, label %if.else
21+
22+
if.then: ; preds = %for.body
23+
%4 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %3, <vscale x 16 x i8> zeroinitializer)
24+
br label %for.inc
25+
; CHECK: %4 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %3, <vscale x 16 x i8> zeroinitializer)
26+
; CHECK-NEXT: br label %for.inc
27+
28+
if.else: ; preds = %for.body
29+
%5 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %3, <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer))
30+
br label %for.inc
31+
32+
for.inc: ; preds = %if.then, %if.else
33+
%cmp_rslt.1 = phi <vscale x 16 x i1> [ %4, %if.then ], [ %5, %if.else ]
34+
%inc = add nuw nsw i64 %i.010, 1
35+
%exitcond.not = icmp eq i64 %inc, 63
36+
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !6
37+
}
38+
39+
; Function Attrs: mustprogress nocallback nofree nosync nounwind readnone willreturn
40+
declare <vscale x 16 x i8> @llvm.aarch64.sve.index.nxv16i8(i8, i8) #1
41+
42+
; Function Attrs: mustprogress nocallback nofree nosync nounwind readnone willreturn
43+
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 immarg) #1
44+
45+
; Function Attrs: mustprogress nocallback nofree nosync nounwind readnone willreturn
46+
declare <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8>, i64) #1
47+
48+
; Function Attrs: mustprogress nocallback nofree nosync nounwind readnone willreturn
49+
declare <vscale x 16 x i1> @llvm.aarch64.sve.cmpne.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>) #1
50+
51+
; Function Attrs: mustprogress nocallback nofree nosync nounwind readnone willreturn
52+
declare i1 @llvm.aarch64.sve.ptest.any.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) #1
53+
54+
attributes #0 = { nofree nosync nounwind readnone uwtable vscale_range(1,16) "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon,+sve,+v8.2a" }
55+
attributes #1 = { mustprogress nocallback nofree nosync nounwind readnone willreturn }
56+
57+
!6 = distinct !{!6, !7}
58+
!7 = !{!"llvm.loop.mustprogress"}

0 commit comments

Comments
 (0)