Skip to content

Commit b8038a9

Browse files
committed
[WebAssembly] Disable SimplifyDemandedVectorElts after legalization
This fixes a reported bug that caused an infinite loop during the SelectionDAG optimization phase in ISel, by creating an overridable hook in `TargetLowering` that allows us to bail out from running `SimplifyDemandedVectorElts`. Reviewed By: tlively Differential Revision: https://reviews.llvm.org/D121869
1 parent 0ca2132 commit b8038a9

File tree

5 files changed

+69
-0
lines changed

5 files changed

+69
-0
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3563,6 +3563,14 @@ class TargetLowering : public TargetLoweringBase {
35633563
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
35643564
DAGCombinerInfo &DCI) const;
35653565

3566+
/// Return true if the target supports simplifying demanded vector elements by
3567+
/// converting them to undefs.
3568+
virtual bool
3569+
shouldSimplifyDemandedVectorElts(SDValue Op,
3570+
const TargetLoweringOpt &TLO) const {
3571+
return true;
3572+
}
3573+
35663574
/// Determine which of the bits specified in Mask are known to be either zero
35673575
/// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
35683576
/// argument allows us to only collect the known bits that are shared by the

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2640,6 +2640,10 @@ bool TargetLowering::SimplifyDemandedVectorElts(
26402640

26412641
KnownUndef = KnownZero = APInt::getZero(NumElts);
26422642

2643+
const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
2644+
if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
2645+
return false;
2646+
26432647
// TODO: For now we assume we know nothing about scalable vectors.
26442648
if (VT.isScalableVector())
26452649
return false;

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,30 @@ WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
911911
return TargetLoweringBase::getPreferredVectorAction(VT);
912912
}
913913

914+
bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
915+
SDValue Op, const TargetLoweringOpt &TLO) const {
916+
// ISel process runs DAGCombiner after legalization; this step is called
917+
// SelectionDAG optimization phase. This post-legalization combining process
918+
// runs DAGCombiner on each node, and if there was a change to be made,
919+
// re-runs legalization again on it and its user nodes to make sure
920+
// everythiing is in a legalized state.
921+
//
922+
// The legalization calls lowering routines, and we do our custom lowering for
923+
// build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
924+
// into zeros. But there is a set of routines in DAGCombiner that turns unused
925+
// (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
926+
// turns unused vector elements into undefs. But this routine does not work
927+
// with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
928+
// combination can result in a infinite loop, in which undefs are converted to
929+
// zeros in legalization and back to undefs in combining.
930+
//
931+
// So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
932+
// running for build_vectors.
933+
if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
934+
return false;
935+
return true;
936+
}
937+
914938
//===----------------------------------------------------------------------===//
915939
// WebAssembly Lowering private implementation.
916940
//===----------------------------------------------------------------------===//

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ class WebAssemblyTargetLowering final : public TargetLowering {
113113
report_fatal_error("llvm.clear_cache is not supported on wasm");
114114
}
115115

116+
bool
117+
shouldSimplifyDemandedVectorElts(SDValue Op,
118+
const TargetLoweringOpt &TLO) const override;
119+
116120
// Custom lowering hooks.
117121
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
118122
SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; RUN: llc < %s -mattr=+simd128 -verify-machineinstrs
2+
3+
target triple = "wasm32-unknown-unknown"
4+
5+
; After DAG legalization, in SelectionDAG optimization phase, ISel runs
6+
; DAGCombiner on each node, among which SimplifyDemandedVectorElts turns unused
7+
; vector elements into undefs. And in order to make sure the DAG is in a
8+
; legalized state, it runs legalization again, which runs our custom
9+
; LowerBUILD_VECTOR, which converts undefs into zeros, causing an infinite loop.
10+
; We prevent this from happening by creating a custom hook , which allows us to
11+
; bail out of SimplifyDemandedVectorElts after legalization.
12+
13+
; This is a reduced test case from a bug reproducer reported. This should not
14+
; hang.
15+
define void @test(i8 %0) {
16+
%2 = insertelement <4 x i8> <i8 -1, i8 -1, i8 -1, i8 poison>, i8 %0, i64 3
17+
%3 = zext <4 x i8> %2 to <4 x i32>
18+
%4 = mul nuw nsw <4 x i32> %3, <i32 257, i32 257, i32 257, i32 257>
19+
%5 = add nuw nsw <4 x i32> %4, <i32 1, i32 1, i32 1, i32 1>
20+
%6 = lshr <4 x i32> %5, <i32 1, i32 1, i32 1, i32 1>
21+
%7 = mul nuw nsw <4 x i32> %6, <i32 20000, i32 20000, i32 20000, i32 20000>
22+
%8 = add nuw nsw <4 x i32> %7, <i32 32768, i32 32768, i32 32768, i32 32768>
23+
%9 = and <4 x i32> %8, <i32 2147418112, i32 2147418112, i32 2147418112, i32 2147418112>
24+
%10 = sub nsw <4 x i32> <i32 655360000, i32 655360000, i32 655360000, i32 655360000>, %9
25+
%11 = ashr exact <4 x i32> %10, <i32 16, i32 16, i32 16, i32 16>
26+
%12 = trunc <4 x i32> %11 to <4 x i16>
27+
store <4 x i16> %12, <4 x i16>* undef, align 4
28+
ret void
29+
}

0 commit comments

Comments
 (0)