Skip to content

Commit e4b2842

Browse files
authored
[SelectionDAG] Handle VSCALE in isKnownNeverZero (#97789)
VSCALE is by definition greater than zero, but this checks it via getVScaleRange anyway. The motivation for this is to be able to check if the EVL for a VP strided load is non-zero in #97394. I added the tests to the RISC-V backend since the existing X86 known-never-zero.ll test crashed when trying to lower vscale for the +sse2 RUN line.
1 parent b3fa2a6 commit e4b2842

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5623,6 +5623,15 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
56235623
case ISD::ZERO_EXTEND:
56245624
case ISD::SIGN_EXTEND:
56255625
return isKnownNeverZero(Op.getOperand(0), Depth + 1);
5626+
case ISD::VSCALE: {
5627+
const Function &F = getMachineFunction().getFunction();
5628+
const APInt &Multiplier = Op.getConstantOperandAPInt(0);
5629+
ConstantRange CR =
5630+
getVScaleRange(&F, Op.getScalarValueSizeInBits()).multiply(Multiplier);
5631+
if (!CR.getUnsignedMin().isZero())
5632+
return true;
5633+
break;
5634+
}
56265635
}
56275636

56285637
return computeKnownBits(Op, Depth).isNonZero();
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
3+
4+
; Use cttz to test if we properly prove never-zero. There is a very
5+
; simple transform from cttz -> cttz_zero_undef if its operand is
6+
; known never zero.
7+
8+
; Even without vscale_range, vscale is always guaranteed to be non-zero.
9+
define i32 @vscale_known_nonzero() {
10+
; CHECK-LABEL: vscale_known_nonzero:
11+
; CHECK: # %bb.0:
12+
; CHECK-NEXT: addi sp, sp, -16
13+
; CHECK-NEXT: .cfi_def_cfa_offset 16
14+
; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
15+
; CHECK-NEXT: .cfi_offset ra, -8
16+
; CHECK-NEXT: csrr a0, vlenb
17+
; CHECK-NEXT: srli a0, a0, 3
18+
; CHECK-NEXT: neg a1, a0
19+
; CHECK-NEXT: and a0, a0, a1
20+
; CHECK-NEXT: lui a1, 30667
21+
; CHECK-NEXT: addiw a1, a1, 1329
22+
; CHECK-NEXT: call __muldi3
23+
; CHECK-NEXT: srliw a0, a0, 27
24+
; CHECK-NEXT: lui a1, %hi(.LCPI0_0)
25+
; CHECK-NEXT: addi a1, a1, %lo(.LCPI0_0)
26+
; CHECK-NEXT: add a0, a1, a0
27+
; CHECK-NEXT: lbu a0, 0(a0)
28+
; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
29+
; CHECK-NEXT: addi sp, sp, 16
30+
; CHECK-NEXT: ret
31+
%x = call i32 @llvm.vscale()
32+
%r = call i32 @llvm.cttz.i32(i32 %x, i1 false)
33+
ret i32 %r
34+
}

0 commit comments

Comments
 (0)