Skip to content

Commit 43a91a8

Browse files
committed
[SelectionDAG] Don't create illegally-typed nodes while constant folding
This patch fixes a (seemingly very rare) crash during vector constant folding introduced in D113300. Normally, during legalization, if we create an illegally-typed node during a failed attempt at constant folding it's cleaned up before being visited, due to it having no uses. If, however, an illegally-typed node is created during one round of legalization and isn't cleaned up, it's possible for a second round of legalization to create new illegally-typed nodes which add extra uses to the old illegal nodes. This means that we can end up visiting the old nodes before they're known to be dead, at which point we crash. I'm not happy about this fix. Creating illegal types at all seems like a bad idea, but we all-too-often rely on illegal constants being successfully folded and being fixed up afterwards. However, we can't rely on constant folding actually happening, and we don't have a foolproof way of peering into the future. Perhaps the correct fix is to revisit the node-iteration order during legalization, ensuring we visit all uses of nodes before the nodes themselves. Or alternatively we could try and clean up dead nodes immediately after failing constant folding. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D122382
1 parent 8a4077f commit 43a91a8

File tree

2 files changed

+96
-1
lines changed

2 files changed

+96
-1
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5611,8 +5611,18 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
56115611

56125612
// Build vector (integer) scalar operands may need implicit
56135613
// truncation - do this before constant folding.
5614-
if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT))
5614+
if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) {
5615+
// Don't create illegally-typed nodes unless they're constants or undef
5616+
// - if we fail to constant fold we can't guarantee the (dead) nodes
5617+
// we're creating will be cleaned up before being visited for
5618+
// legalization.
5619+
if (NewNodesMustHaveLegalTypes && !ScalarOp.isUndef() &&
5620+
!isa<ConstantSDNode>(ScalarOp) &&
5621+
TLI->getTypeAction(*getContext(), InSVT) !=
5622+
TargetLowering::TypeLegal)
5623+
return SDValue();
56155624
ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);
5625+
}
56165626

56175627
ScalarOps.push_back(ScalarOp);
56185628
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s --check-prefix RV32
4+
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
5+
; RUN: | FileCheck %s --check-prefix RV64
6+
7+
; This used to crash during type legalization, where lowering (v4i1 =
8+
; BUILD_VECTOR) created a (v4i1 = SETCC v4i8) which during constant-folding
9+
; created illegally-typed i8 nodes. Ultimately, constant-folding failed and so
10+
; the new illegal nodes had no uses. However, during a second round of
11+
; legalization, this same pattern was generated from another BUILD_VECTOR. This
12+
; meant one of the illegally-typed (i8 = Constant<0>) nodes now had two dead
13+
; uses. Because the Constant and one of the uses were from round 1, they were
14+
; further up in the node order than the new second use, so the constant was
15+
; visited while it wasn't "dead". At the point of visiting the constant, we
16+
; crashed.
17+
18+
define void @constant_folding_crash(i8* %v54, <4 x <4 x i32>*> %lanes.a, <4 x <4 x i32>*> %lanes.b, <4 x i1> %sel) {
19+
; RV32-LABEL: constant_folding_crash:
20+
; RV32: # %bb.0: # %entry
21+
; RV32-NEXT: lw a0, 8(a0)
22+
; RV32-NEXT: vmv1r.v v10, v0
23+
; RV32-NEXT: andi a0, a0, 1
24+
; RV32-NEXT: seqz a0, a0
25+
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
26+
; RV32-NEXT: vmv.v.x v11, a0
27+
; RV32-NEXT: vmsne.vi v0, v11, 0
28+
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
29+
; RV32-NEXT: vmerge.vvm v8, v9, v8, v0
30+
; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
31+
; RV32-NEXT: vmv.v.i v9, 0
32+
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
33+
; RV32-NEXT: vmv.x.s a0, v8
34+
; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
35+
; RV32-NEXT: vmv1r.v v0, v10
36+
; RV32-NEXT: vmerge.vim v8, v9, 1, v0
37+
; RV32-NEXT: vmv.x.s a1, v8
38+
; RV32-NEXT: andi a1, a1, 1
39+
; RV32-NEXT: vmv.v.x v8, a1
40+
; RV32-NEXT: vmsne.vi v0, v8, 0
41+
; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu
42+
; RV32-NEXT: vmv.v.i v8, 10
43+
; RV32-NEXT: vse32.v v8, (a0), v0.t
44+
; RV32-NEXT: ret
45+
;
46+
; RV64-LABEL: constant_folding_crash:
47+
; RV64: # %bb.0: # %entry
48+
; RV64-NEXT: ld a0, 8(a0)
49+
; RV64-NEXT: vmv1r.v v12, v0
50+
; RV64-NEXT: andi a0, a0, 1
51+
; RV64-NEXT: seqz a0, a0
52+
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
53+
; RV64-NEXT: vmv.v.x v13, a0
54+
; RV64-NEXT: vmsne.vi v0, v13, 0
55+
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu
56+
; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
57+
; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, mu
58+
; RV64-NEXT: vmv.v.i v10, 0
59+
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, mu
60+
; RV64-NEXT: vmv.x.s a0, v8
61+
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
62+
; RV64-NEXT: vmv1r.v v0, v12
63+
; RV64-NEXT: vmerge.vim v8, v10, 1, v0
64+
; RV64-NEXT: vmv.x.s a1, v8
65+
; RV64-NEXT: andi a1, a1, 1
66+
; RV64-NEXT: vmv.v.x v8, a1
67+
; RV64-NEXT: vmsne.vi v0, v8, 0
68+
; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, mu
69+
; RV64-NEXT: vmv.v.i v8, 10
70+
; RV64-NEXT: vse32.v v8, (a0), v0.t
71+
; RV64-NEXT: ret
72+
entry:
73+
%sunkaddr = getelementptr i8, i8* %v54, i64 8
74+
%v55 = bitcast i8* %sunkaddr to i64*
75+
%v56 = load i64, i64* %v55, align 8
76+
%trunc = and i64 %v56, 1
77+
%cmp = icmp eq i64 %trunc, 0
78+
%ptrs = select i1 %cmp, <4 x <4 x i32>*> %lanes.a, <4 x <4 x i32>*> %lanes.b
79+
%v67 = extractelement <4 x <4 x i32>*> %ptrs, i64 0
80+
%mask = shufflevector <4 x i1> %sel, <4 x i1> undef, <4 x i32> zeroinitializer
81+
call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i32>* %v67, i32 16, <4 x i1> %mask)
82+
ret void
83+
}
84+
85+
declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)

0 commit comments

Comments
 (0)