Skip to content

Commit b95ec24

Browse files
authored
[SDAG] Handle insert_subvector in isKnownNeverNaN (#131989)
Propagate nnan across insert_subvector.
1 parent f0a59c4 commit b95ec24

File tree

3 files changed

+126
-0
lines changed

3 files changed

+126
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5752,6 +5752,34 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, const APInt &DemandedElts,
57525752
}
57535753
return isKnownNeverNaN(Src, SNaN, Depth + 1);
57545754
}
5755+
case ISD::INSERT_SUBVECTOR: {
5756+
SDValue BaseVector = Op.getOperand(0);
5757+
SDValue SubVector = Op.getOperand(1);
5758+
EVT BaseVectorVT = BaseVector.getValueType();
5759+
if (BaseVectorVT.isFixedLengthVector()) {
5760+
unsigned Idx = Op.getConstantOperandVal(2);
5761+
unsigned NumBaseElts = BaseVectorVT.getVectorNumElements();
5762+
unsigned NumSubElts = SubVector.getValueType().getVectorNumElements();
5763+
5764+
// Clear/Extract the bits at the position where the subvector will be
5765+
// inserted.
5766+
APInt DemandedMask =
5767+
APInt::getBitsSet(NumBaseElts, Idx, Idx + NumSubElts);
5768+
APInt DemandedSrcElts = DemandedElts & ~DemandedMask;
5769+
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
5770+
5771+
bool NeverNaN = true;
5772+
if (!DemandedSrcElts.isZero())
5773+
NeverNaN &=
5774+
isKnownNeverNaN(BaseVector, DemandedSrcElts, SNaN, Depth + 1);
5775+
if (NeverNaN && !DemandedSubElts.isZero())
5776+
NeverNaN &=
5777+
isKnownNeverNaN(SubVector, DemandedSubElts, SNaN, Depth + 1);
5778+
return NeverNaN;
5779+
}
5780+
return isKnownNeverNaN(BaseVector, SNaN, Depth + 1) &&
5781+
isKnownNeverNaN(SubVector, SNaN, Depth + 1);
5782+
}
57555783
case ISD::BUILD_VECTOR: {
57565784
unsigned NumElts = Op.getNumOperands();
57575785
for (unsigned I = 0; I != NumElts; ++I)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,3 +357,52 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
357357
%v = call <2 x half> @llvm.maximum.v2f16(<2 x half> %a, <2 x half> %c)
358358
ret <2 x half> %v
359359
}
360+
361+
declare <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half>, <2 x half>, i64)
362+
363+
define <4 x half> @vfmax_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half> %b, <4 x half> %c) {
364+
; ZVFH-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
365+
; ZVFH: # %bb.0:
366+
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
367+
; ZVFH-NEXT: vfadd.vv v8, v8, v8
368+
; ZVFH-NEXT: vfadd.vv v9, v9, v9
369+
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
370+
; ZVFH-NEXT: vmfeq.vv v0, v10, v10
371+
; ZVFH-NEXT: vslideup.vi v8, v9, 2
372+
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
373+
; ZVFH-NEXT: vfmax.vv v8, v8, v10
374+
; ZVFH-NEXT: ret
375+
;
376+
; ZVFHMIN-LABEL: vfmax_v2f16_vv_nnan_insert_subvector:
377+
; ZVFHMIN: # %bb.0:
378+
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
379+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
380+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
381+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
382+
; ZVFHMIN-NEXT: vfadd.vv v9, v11, v11
383+
; ZVFHMIN-NEXT: vfadd.vv v8, v8, v8
384+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
385+
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v9
386+
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
387+
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
388+
; ZVFHMIN-NEXT: vslideup.vi v11, v9, 2
389+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
390+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
391+
; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
392+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
393+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
394+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
395+
; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0
396+
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
397+
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0
398+
; ZVFHMIN-NEXT: vfmax.vv v9, v8, v10
399+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
400+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
401+
; ZVFHMIN-NEXT: ret
402+
%d = fadd nnan <2 x half> %a, %a
403+
%e = fadd nnan <2 x half> %b, %b
404+
%f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> undef, <2 x half> %d, i64 0)
405+
%g = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> %f, <2 x half> %e, i64 2)
406+
%v = call <4 x half> @llvm.maximum.v4f16(<4 x half> %g, <4 x half> %c)
407+
ret <4 x half> %v
408+
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,3 +357,52 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) {
357357
%v = call <2 x half> @llvm.minimum.v2f16(<2 x half> %a, <2 x half> %c)
358358
ret <2 x half> %v
359359
}
360+
361+
declare <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half>, <2 x half>, i64)
362+
363+
define <4 x half> @vfmin_v2f16_vv_nnan_insert_subvector(<2 x half> %a, <2 x half> %b, <4 x half> %c) {
364+
; ZVFH-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:
365+
; ZVFH: # %bb.0:
366+
; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
367+
; ZVFH-NEXT: vfadd.vv v8, v8, v8
368+
; ZVFH-NEXT: vfadd.vv v9, v9, v9
369+
; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
370+
; ZVFH-NEXT: vmfeq.vv v0, v10, v10
371+
; ZVFH-NEXT: vslideup.vi v8, v9, 2
372+
; ZVFH-NEXT: vmerge.vvm v8, v10, v8, v0
373+
; ZVFH-NEXT: vfmin.vv v8, v8, v10
374+
; ZVFH-NEXT: ret
375+
;
376+
; ZVFHMIN-LABEL: vfmin_v2f16_vv_nnan_insert_subvector:
377+
; ZVFHMIN: # %bb.0:
378+
; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
379+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v11, v8
380+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9
381+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
382+
; ZVFHMIN-NEXT: vfadd.vv v9, v11, v11
383+
; ZVFHMIN-NEXT: vfadd.vv v8, v8, v8
384+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
385+
; ZVFHMIN-NEXT: vfncvt.f.f.w v11, v9
386+
; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
387+
; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
388+
; ZVFHMIN-NEXT: vslideup.vi v11, v9, 2
389+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v11
390+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
391+
; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
392+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
393+
; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v10
394+
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
395+
; ZVFHMIN-NEXT: vmerge.vvm v10, v8, v9, v0
396+
; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9
397+
; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v8, v0
398+
; ZVFHMIN-NEXT: vfmin.vv v9, v8, v10
399+
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
400+
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9
401+
; ZVFHMIN-NEXT: ret
402+
%d = fadd nnan <2 x half> %a, %a
403+
%e = fadd nnan <2 x half> %b, %b
404+
%f = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> undef, <2 x half> %d, i64 0)
405+
%g = call <4 x half> @llvm.vector.insert.v2f32.v4f32(<4 x half> %f, <2 x half> %e, i64 2)
406+
%v = call <4 x half> @llvm.minimum.v4f16(<4 x half> %g, <4 x half> %c)
407+
ret <4 x half> %v
408+
}

0 commit comments

Comments
 (0)