Skip to content

Commit ece66db

Browse files
lukel97preames
andauthored
[SelectionDAG] Add computeKnownBits support for ISD::STEP_VECTOR (#80452)
This handles two cases where we can work out some known-zero bits for ISD::STEP_VECTOR. The first case handles when we know the low bits are zero because the step amount is a power of two. This is taken from https://reviews.llvm.org/D128159, and even though the original patch didn't end up landing this case due to it not having any test difference, I've included it here for completeness's sake. The second case handles the case when we have an upper bound on vscale_range. We can use this to work out the upper bound on the number of elements, and thus what the maximum step will be. From the maximum step we then know which hi bits are zero. On its own, computing the known hi bits results in some small improvements for RVV with -mrvv-vector-bits=zvl across the llvm-test-suite. However I'm hoping to be able to use this later to reduce the LMUL in index calculations for vrgather/indexed accesses. --------- Co-authored-by: Philip Reames <[email protected]>
1 parent 0d7f232 commit ece66db

File tree

2 files changed

+69
-0
lines changed

2 files changed

+69
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3110,6 +3110,33 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
31103110
}
31113111
break;
31123112
}
3113+
case ISD::STEP_VECTOR: {
3114+
const APInt &Step = Op.getConstantOperandAPInt(0);
3115+
3116+
if (Step.isPowerOf2())
3117+
Known.Zero.setLowBits(Step.logBase2());
3118+
3119+
const Function &F = getMachineFunction().getFunction();
3120+
3121+
if (!isUIntN(BitWidth, Op.getValueType().getVectorMinNumElements()))
3122+
break;
3123+
const APInt MinNumElts =
3124+
APInt(BitWidth, Op.getValueType().getVectorMinNumElements());
3125+
3126+
bool Overflow;
3127+
const APInt MaxNumElts = getVScaleRange(&F, BitWidth)
3128+
.getUnsignedMax()
3129+
.umul_ov(MinNumElts, Overflow);
3130+
if (Overflow)
3131+
break;
3132+
3133+
const APInt MaxValue = (MaxNumElts - 1).umul_ov(Step, Overflow);
3134+
if (Overflow)
3135+
break;
3136+
3137+
Known.Zero.setHighBits(MaxValue.countl_zero());
3138+
break;
3139+
}
31133140
case ISD::BUILD_VECTOR:
31143141
assert(!Op.getValueType().isScalableVector());
31153142
// Collect the known bits that are shared by every demanded vector element.

llvm/test/CodeGen/RISCV/rvv/stepvector.ll

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,3 +733,45 @@ entry:
733733
%3 = shl <vscale x 16 x i64> %2, %1
734734
ret <vscale x 16 x i64> %3
735735
}
736+
737+
; maximum step is 4 * 2 = 8, so maximum step value is 7, so hi 61 bits are known
738+
; zero
739+
define <vscale x 2 x i64> @hi_bits_known_zero() vscale_range(2, 4) {
740+
; CHECK-LABEL: hi_bits_known_zero:
741+
; CHECK: # %bb.0:
742+
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
743+
; CHECK-NEXT: vmv.v.i v8, 0
744+
; CHECK-NEXT: ret
745+
%step = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
746+
%and = and <vscale x 2 x i64> %step, shufflevector(<vscale x 2 x i64> insertelement(<vscale x 2 x i64> poison, i64 u0xfffffffffffffff8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
747+
ret <vscale x 2 x i64> %and
748+
}
749+
750+
; the maximum step here overflows so don't set the known hi bits
751+
define <vscale x 2 x i64> @hi_bits_known_zero_overflow() vscale_range(2, 4) {
752+
; CHECK-LABEL: hi_bits_known_zero_overflow:
753+
; CHECK: # %bb.0:
754+
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
755+
; CHECK-NEXT: vid.v v8
756+
; CHECK-NEXT: li a0, -1
757+
; CHECK-NEXT: vmul.vx v8, v8, a0
758+
; CHECK-NEXT: vand.vi v8, v8, -8
759+
; CHECK-NEXT: ret
760+
%step = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
761+
%step.mul = mul <vscale x 2 x i64> %step, shufflevector(<vscale x 2 x i64> insertelement(<vscale x 2 x i64> poison, i64 u0xffffffffffffffff, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
762+
%and = and <vscale x 2 x i64> %step.mul, shufflevector(<vscale x 2 x i64> insertelement(<vscale x 2 x i64> poison, i64 u0xfffffffffffffff8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
763+
ret <vscale x 2 x i64> %and
764+
}
765+
766+
; step values are multiple of 8, so lo 3 bits are known zero
767+
define <vscale x 2 x i64> @lo_bits_known_zero() {
768+
; CHECK-LABEL: lo_bits_known_zero:
769+
; CHECK: # %bb.0:
770+
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma
771+
; CHECK-NEXT: vmv.v.i v8, 0
772+
; CHECK-NEXT: ret
773+
%step = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
774+
%step.mul = mul <vscale x 2 x i64> %step, shufflevector(<vscale x 2 x i64> insertelement(<vscale x 2 x i64> poison, i64 8, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
775+
%and = and <vscale x 2 x i64> %step.mul, shufflevector(<vscale x 2 x i64> insertelement(<vscale x 2 x i64> poison, i64 7, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
776+
ret <vscale x 2 x i64> %and
777+
}

0 commit comments

Comments
 (0)