Skip to content

[LoongArch] Add demanded bits support for [X]VMSKLTZ #143528

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5634,6 +5634,21 @@ static SDValue performMOVFR2GR_SCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}

static SDValue performVMSKLTZCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const LoongArchSubtarget &Subtarget) {
MVT VT = N->getSimpleValueType(0);
unsigned NumBits = VT.getScalarSizeInBits();

// Simplify the inputs.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
APInt DemandedMask(APInt::getAllOnes(NumBits));
if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
return SDValue(N, 0);

return SDValue();
}

SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
Expand All @@ -5658,6 +5673,9 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
return performMOVGR2FR_WCombine(N, DAG, DCI, Subtarget);
case LoongArchISD::MOVFR2GR_S_LA64:
return performMOVFR2GR_SCombine(N, DAG, DCI, Subtarget);
case LoongArchISD::VMSKLTZ:
case LoongArchISD::XVMSKLTZ:
return performVMSKLTZCombine(N, DAG, DCI, Subtarget);
}
return SDValue();
}
Expand Down Expand Up @@ -8192,3 +8210,58 @@ unsigned LoongArchTargetLowering::getNumRegistersForCallingConv(

return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}

bool LoongArchTargetLowering::SimplifyDemandedBitsForTargetNode(
SDValue Op, const APInt &OriginalDemandedBits,
const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
unsigned Depth) const {
EVT VT = Op.getValueType();
unsigned BitWidth = OriginalDemandedBits.getBitWidth();
unsigned Opc = Op.getOpcode();
switch (Opc) {
default:
break;
case LoongArchISD::VMSKLTZ:
case LoongArchISD::XVMSKLTZ: {
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
unsigned SrcBits = SrcVT.getScalarSizeInBits();
unsigned NumElts = SrcVT.getVectorNumElements();

// If we don't need the sign bits at all just return zero.
if (OriginalDemandedBits.countr_zero() >= NumElts)
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));

// Only demand the vector elements of the sign bits we need.
APInt KnownUndef, KnownZero;
APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts);
if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
TLO, Depth + 1))
return true;

Known.Zero = KnownZero.zext(BitWidth);
Known.Zero.setHighBits(BitWidth - NumElts);

// [X]VMSKLTZ only uses the MSB from each vector element.
KnownBits KnownSrc;
APInt DemandedSrcBits = APInt::getSignMask(SrcBits);
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO,
Depth + 1))
return true;

if (KnownSrc.One[SrcBits - 1])
Known.One.setLowBits(NumElts);
else if (KnownSrc.Zero[SrcBits - 1])
Known.Zero.setLowBits(NumElts);

// Attempt to avoid multi-use ops if we don't need anything from it.
if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1))
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
return false;
}
}

return TargetLowering::SimplifyDemandedBitsForTargetNode(
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
}
6 changes: 6 additions & 0 deletions llvm/lib/Target/LoongArch/LoongArchISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,6 +314,12 @@ class LoongArchTargetLowering : public TargetLowering {
bool isFPImmVLDILegal(const APFloat &Imm, EVT VT) const;
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;

bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits,
const APInt &DemandedElts,
KnownBits &Known,
TargetLoweringOpt &TLO,
unsigned Depth) const override;

private:
/// Target-specific function used to lower LoongArch calling conventions.
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,
Expand Down
15 changes: 3 additions & 12 deletions llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
Original file line number Diff line number Diff line change
Expand Up @@ -383,9 +383,8 @@ define i8 @xvmsk_eq_vsel_slt_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> %a2)
; CHECK-LABEL: xvmsk_eq_vsel_slt_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvslti.w $xr1, $xr2, 0
; CHECK-NEXT: xvrepli.b $xr2, -1
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr2, $xr0
; CHECK-NEXT: xvrepli.b $xr1, -1
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0
; CHECK-NEXT: xvmskltz.w $xr0, $xr0
; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
Expand All @@ -408,8 +407,7 @@ define i8 @xvmsk_sel_eq_or_eq_or_slt_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i3
; CHECK-NEXT: xvreplgr2vr.w $xr4, $a0
; CHECK-NEXT: xvand.v $xr2, $xr2, $xr4
; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvslti.w $xr1, $xr3, 0
; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
; CHECK-NEXT: xvor.v $xr0, $xr3, $xr0
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2
; CHECK-NEXT: xvmskltz.w $xr0, $xr0
; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
Expand Down Expand Up @@ -530,7 +528,6 @@ define i8 @xvmsk_eq_v2i64_concat_poison(<2 x i64> %vec) {
; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
; CHECK-NEXT: vpackev.h $vr0, $vr0, $vr1
; CHECK-NEXT: vslli.h $vr0, $vr0, 15
; CHECK-NEXT: vsrai.h $vr0, $vr0, 15
; CHECK-NEXT: vmskltz.h $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand Down Expand Up @@ -558,7 +555,6 @@ define i8 @xvmsk_ne_v4i32_concat_poison(<4 x i32> %vec) {
; CHECK-NEXT: st.h $a0, $sp, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vslli.h $vr0, $vr0, 15
; CHECK-NEXT: vsrai.h $vr0, $vr0, 15
; CHECK-NEXT: vmskltz.h $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
Expand Down Expand Up @@ -586,7 +582,6 @@ define i8 @xvmsk_ogt_v4f64_concat_poison(<4 x double> %vec) {
; CHECK-NEXT: st.h $a0, $sp, 0
; CHECK-NEXT: vld $vr0, $sp, 0
; CHECK-NEXT: vslli.h $vr0, $vr0, 15
; CHECK-NEXT: vsrai.h $vr0, $vr0, 15
; CHECK-NEXT: vmskltz.h $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: addi.d $sp, $sp, 16
Expand All @@ -601,7 +596,6 @@ define i32 @xvmsk_trunc_i8(<32 x i8> %a) {
; CHECK-LABEL: xvmsk_trunc_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvslli.b $xr0, $xr0, 7
; CHECK-NEXT: xvsrai.b $xr0, $xr0, 7
; CHECK-NEXT: xvmskltz.b $xr0, $xr0
; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
Expand All @@ -616,7 +610,6 @@ define i16 @xvmsk_trunc_i16(<16 x i16> %a) {
; CHECK-LABEL: xvmsk_trunc_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvslli.h $xr0, $xr0, 15
; CHECK-NEXT: xvsrai.h $xr0, $xr0, 15
; CHECK-NEXT: xvmskltz.h $xr0, $xr0
; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
Expand All @@ -631,7 +624,6 @@ define i8 @xvmsk_trunc_i32(<8 x i32> %a) {
; CHECK-LABEL: xvmsk_trunc_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvslli.w $xr0, $xr0, 31
; CHECK-NEXT: xvsrai.w $xr0, $xr0, 31
; CHECK-NEXT: xvmskltz.w $xr0, $xr0
; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
Expand All @@ -646,7 +638,6 @@ define i4 @xvmsk_trunc_i64(<4 x i64> %a) {
; CHECK-LABEL: xvmsk_trunc_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvslli.d $xr0, $xr0, 63
; CHECK-NEXT: xvsrai.d $xr0, $xr0, 63
; CHECK-NEXT: xvmskltz.d $xr0, $xr0
; CHECK-NEXT: xvpickve2gr.wu $a0, $xr0, 0
; CHECK-NEXT: xvpickve2gr.wu $a1, $xr0, 4
Expand Down
16 changes: 0 additions & 16 deletions llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,6 @@ define i2 @vmsk_sgt_v2i8(<2 x i8> %a, <2 x i8> %b) {
; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
; CHECK-NEXT: vslli.d $vr0, $vr0, 56
; CHECK-NEXT: vsrai.d $vr0, $vr0, 56
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand All @@ -197,7 +196,6 @@ define i2 @vmsk_sgt_v2i16(<2 x i16> %a, <2 x i16> %b) {
; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
; CHECK-NEXT: vslli.d $vr0, $vr0, 48
; CHECK-NEXT: vsrai.d $vr0, $vr0, 48
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand All @@ -212,7 +210,6 @@ define i2 @vmsk_sgt_v2i32(<2 x i32> %a, <2 x i32> %b) {
; CHECK-NEXT: vslt.w $vr0, $vr1, $vr0
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16
; CHECK-NEXT: vslli.d $vr0, $vr0, 32
; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand Down Expand Up @@ -252,7 +249,6 @@ define i4 @vmsk_sgt_v4i8(<4 x i8> %a, <4 x i8> %b) {
; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
; CHECK-NEXT: vslli.w $vr0, $vr0, 24
; CHECK-NEXT: vsrai.w $vr0, $vr0, 24
; CHECK-NEXT: vmskltz.w $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand All @@ -267,7 +263,6 @@ define i4 @vmsk_sgt_v4i16(<4 x i16> %a, <4 x i16> %b) {
; CHECK-NEXT: vslt.h $vr0, $vr1, $vr0
; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
; CHECK-NEXT: vslli.w $vr0, $vr0, 16
; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
; CHECK-NEXT: vmskltz.w $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand Down Expand Up @@ -306,7 +301,6 @@ define i8 @vmsk_sgt_v8i8(<8 x i8> %a, <8 x i8> %b) {
; CHECK-NEXT: vslt.b $vr0, $vr1, $vr0
; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
; CHECK-NEXT: vslli.h $vr0, $vr0, 8
; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
; CHECK-NEXT: vmskltz.h $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand Down Expand Up @@ -349,7 +343,6 @@ define i2 @vmsk_sgt_and_sgt_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8>
; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
; CHECK-NEXT: vslli.d $vr0, $vr0, 56
; CHECK-NEXT: vsrai.d $vr0, $vr0, 56
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand All @@ -369,7 +362,6 @@ define i2 @vmsk_sgt_and_sgt_v2i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x
; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
; CHECK-NEXT: vilvl.w $vr0, $vr0, $vr0
; CHECK-NEXT: vslli.d $vr0, $vr0, 48
; CHECK-NEXT: vsrai.d $vr0, $vr0, 48
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand All @@ -388,7 +380,6 @@ define i2 @vmsk_sgt_and_sgt_v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 16
; CHECK-NEXT: vslli.d $vr0, $vr0, 32
; CHECK-NEXT: vsrai.d $vr0, $vr0, 32
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand Down Expand Up @@ -440,7 +431,6 @@ define i4 @vmsk_sgt_and_sgt_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8>
; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
; CHECK-NEXT: vslli.w $vr0, $vr0, 24
; CHECK-NEXT: vsrai.w $vr0, $vr0, 24
; CHECK-NEXT: vmskltz.w $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand All @@ -459,7 +449,6 @@ define i4 @vmsk_sgt_and_sgt_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vilvl.h $vr0, $vr0, $vr0
; CHECK-NEXT: vslli.w $vr0, $vr0, 16
; CHECK-NEXT: vsrai.w $vr0, $vr0, 16
; CHECK-NEXT: vmskltz.w $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand Down Expand Up @@ -510,7 +499,6 @@ define i8 @vmsk_sgt_and_sgt_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8>
; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
; CHECK-NEXT: vilvl.b $vr0, $vr0, $vr0
; CHECK-NEXT: vslli.h $vr0, $vr0, 8
; CHECK-NEXT: vsrai.h $vr0, $vr0, 8
; CHECK-NEXT: vmskltz.h $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand Down Expand Up @@ -557,7 +545,6 @@ define i16 @vmsk_trunc_i8(<16 x i8> %a) {
; CHECK-LABEL: vmsk_trunc_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vslli.b $vr0, $vr0, 7
; CHECK-NEXT: vsrai.b $vr0, $vr0, 7
; CHECK-NEXT: vmskltz.b $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand All @@ -570,7 +557,6 @@ define i8 @vmsk_trunc_i16(<8 x i16> %a) {
; CHECK-LABEL: vmsk_trunc_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vslli.h $vr0, $vr0, 15
; CHECK-NEXT: vsrai.h $vr0, $vr0, 15
; CHECK-NEXT: vmskltz.h $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand All @@ -583,7 +569,6 @@ define i4 @vmsk_trunc_i32(<4 x i32> %a) {
; CHECK-LABEL: vmsk_trunc_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vslli.w $vr0, $vr0, 31
; CHECK-NEXT: vsrai.w $vr0, $vr0, 31
; CHECK-NEXT: vmskltz.w $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand All @@ -596,7 +581,6 @@ define i2 @vmsk_trunc_i64(<2 x i64> %a) {
; CHECK-LABEL: vmsk_trunc_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vslli.d $vr0, $vr0, 63
; CHECK-NEXT: vsrai.d $vr0, $vr0, 63
; CHECK-NEXT: vmskltz.d $vr0, $vr0
; CHECK-NEXT: vpickve2gr.hu $a0, $vr0, 0
; CHECK-NEXT: ret
Expand Down
Loading