[SelectionDAG] Don't create illegally-typed nodes while constant folding

frasercrmck · frasercrmck · commit 43a91a8474f5 · 2022-03-30T13:17:55.000+01:00
This patch fixes a (seemingly very rare) crash during vector constant folding introduced in D113300. Normally, during legalization, if we create an illegally-typed node during a failed attempt at constant folding it's cleaned up before being visited, due to it having no uses. If, however, an illegally-typed node is created during one round of legalization and isn't cleaned up, it's possible for a second round of legalization to create new illegally-typed nodes which add extra uses to the old illegal nodes. This means that we can end up visiting the old nodes before they're known to be dead, at which point we crash. I'm not happy about this fix. Creating illegal types at all seems like a bad idea, but we all-too-often rely on illegal constants being successfully folded and being fixed up afterwards. However, we can't rely on constant folding actually happening, and we don't have a foolproof way of peering into the future. Perhaps the correct fix is to revisit the node-iteration order during legalization, ensuring we visit all uses of nodes before the nodes themselves. Or alternatively we could try and clean up dead nodes immediately after failing constant folding. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D122382
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5611,8 +5611,18 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
 
       // Build vector (integer) scalar operands may need implicit
       // truncation - do this before constant folding.
-      if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT))
+      if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) {
+        // Don't create illegally-typed nodes unless they're constants or undef
+        // - if we fail to constant fold we can't guarantee the (dead) nodes
+        // we're creating will be cleaned up before being visited for
+        // legalization.
+        if (NewNodesMustHaveLegalTypes && !ScalarOp.isUndef() &&
+            !isa<ConstantSDNode>(ScalarOp) &&
+            TLI->getTypeAction(*getContext(), InSVT) !=
+                TargetLowering::TypeLegal)
+          return SDValue();
         ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp);
+      }
 
       ScalarOps.push_back(ScalarOp);
     }
diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix RV64
+
+; This used to crash during type legalization, where lowering (v4i1 =
+; BUILD_VECTOR) created a (v4i1 = SETCC v4i8) which during constant-folding
+; created illegally-typed i8 nodes. Ultimately, constant-folding failed and so
+; the new illegal nodes had no uses. However, during a second round of
+; legalization, this same pattern was generated from another BUILD_VECTOR. This
+; meant one of the illegally-typed (i8 = Constant<0>) nodes now had two dead
+; uses. Because the Constant and one of the uses were from round 1, they were
+; further up in the node order than the new second use, so the constant was
+; visited while it wasn't "dead". At the point of visiting the constant, we
+; crashed.
+
+define void @constant_folding_crash(i8* %v54, <4 x <4 x i32>*> %lanes.a, <4 x <4 x i32>*> %lanes.b, <4 x i1> %sel) {
+; RV32-LABEL: constant_folding_crash:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    lw a0, 8(a0)
+; RV32-NEXT:    vmv1r.v v10, v0
+; RV32-NEXT:    andi a0, a0, 1
+; RV32-NEXT:    seqz a0, a0
+; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; RV32-NEXT:    vmv.v.x v11, a0
+; RV32-NEXT:    vmsne.vi v0, v11, 0
+; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; RV32-NEXT:    vmerge.vvm v8, v9, v8, v0
+; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
+; RV32-NEXT:    vmv.v.i v9, 0
+; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; RV32-NEXT:    vmv.x.s a0, v8
+; RV32-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vmerge.vim v8, v9, 1, v0
+; RV32-NEXT:    vmv.x.s a1, v8
+; RV32-NEXT:    andi a1, a1, 1
+; RV32-NEXT:    vmv.v.x v8, a1
+; RV32-NEXT:    vmsne.vi v0, v8, 0
+; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; RV32-NEXT:    vmv.v.i v8, 10
+; RV32-NEXT:    vse32.v v8, (a0), v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: constant_folding_crash:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    ld a0, 8(a0)
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    andi a0, a0, 1
+; RV64-NEXT:    seqz a0, a0
+; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; RV64-NEXT:    vmv.v.x v13, a0
+; RV64-NEXT:    vmsne.vi v0, v13, 0
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vmerge.vvm v8, v10, v8, v0
+; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
+; RV64-NEXT:    vmv.v.i v10, 0
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV64-NEXT:    vmv.x.s a0, v8
+; RV64-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vmerge.vim v8, v10, 1, v0
+; RV64-NEXT:    vmv.x.s a1, v8
+; RV64-NEXT:    andi a1, a1, 1
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    vmsne.vi v0, v8, 0
+; RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
+; RV64-NEXT:    vmv.v.i v8, 10
+; RV64-NEXT:    vse32.v v8, (a0), v0.t
+; RV64-NEXT:    ret
+entry:
+  %sunkaddr = getelementptr i8, i8* %v54, i64 8
+  %v55 = bitcast i8* %sunkaddr to i64*
+  %v56 = load i64, i64* %v55, align 8
+  %trunc = and i64 %v56, 1
+  %cmp = icmp eq i64 %trunc, 0
+  %ptrs = select i1 %cmp, <4 x <4 x i32>*> %lanes.a, <4 x <4 x i32>*> %lanes.b
+  %v67 = extractelement <4 x <4 x i32>*> %ptrs, i64 0
+  %mask = shufflevector <4 x i1> %sel, <4 x i1> undef, <4 x i32> zeroinitializer
+  call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i32>* %v67, i32 16, <4 x i1> %mask)
+  ret void
+}
+
+declare void @llvm.masked.store.v4i32.p0v4i32(<4 x i32>, <4 x i32>*, i32, <4 x i1>)