@@ -15758,16 +15758,12 @@ void SITargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
15758
15758
case Intrinsic::amdgcn_mbcnt_hi: {
15759
15759
const GCNSubtarget &ST =
15760
15760
DAG.getMachineFunction().getSubtarget<GCNSubtarget>();
15761
- // These return at most the (wavefront size - 1) + src1
15762
- // As long as src1 is an immediate we can calc known bits
15763
- KnownBits Src1Known = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
15764
- unsigned Src1ValBits = Src1Known.countMaxActiveBits();
15765
- unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2());
15766
- // Cater for potential carry
15767
- MaxActiveBits += Src1ValBits ? 1 : 0;
15768
- unsigned Size = Op.getValueType().getSizeInBits();
15769
- if (MaxActiveBits < Size)
15770
- Known.Zero.setHighBits(Size - MaxActiveBits);
15761
+ // Wave64 mbcnt_lo returns at most 32 + src1. Otherwise these return at
15762
+ // most 31 + src1.
15763
+ Known.Zero.setBitsFrom(
15764
+ IID == Intrinsic::amdgcn_mbcnt_lo ? ST.getWavefrontSizeLog2() : 5);
15765
+ KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
15766
+ Known = KnownBits::add(Known, Known2);
15771
15767
return;
15772
15768
}
15773
15769
}
@@ -15802,7 +15798,8 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
15802
15798
switch (MI->getOpcode()) {
15803
15799
case AMDGPU::G_INTRINSIC:
15804
15800
case AMDGPU::G_INTRINSIC_CONVERGENT: {
15805
- switch (cast<GIntrinsic>(MI)->getIntrinsicID()) {
15801
+ Intrinsic::ID IID = cast<GIntrinsic>(MI)->getIntrinsicID();
15802
+ switch (IID) {
15806
15803
case Intrinsic::amdgcn_workitem_id_x:
15807
15804
knownBitsForWorkitemID(*getSubtarget(), KB, Known, 0);
15808
15805
break;
@@ -15814,9 +15811,15 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
15814
15811
break;
15815
15812
case Intrinsic::amdgcn_mbcnt_lo:
15816
15813
case Intrinsic::amdgcn_mbcnt_hi: {
15817
- // These return at most the wavefront size - 1.
15818
- unsigned Size = MRI.getType(R).getSizeInBits();
15819
- Known.Zero.setHighBits(Size - getSubtarget()->getWavefrontSizeLog2());
15814
+ // Wave64 mbcnt_lo returns at most 32 + src1. Otherwise these return at
15815
+ // most 31 + src1.
15816
+ Known.Zero.setBitsFrom(IID == Intrinsic::amdgcn_mbcnt_lo
15817
+ ? getSubtarget()->getWavefrontSizeLog2()
15818
+ : 5);
15819
+ KnownBits Known2;
15820
+ KB.computeKnownBitsImpl(MI->getOperand(3).getReg(), Known2, DemandedElts,
15821
+ Depth + 1);
15822
+ Known = KnownBits::add(Known, Known2);
15820
15823
break;
15821
15824
}
15822
15825
case Intrinsic::amdgcn_groupstaticsize: {
0 commit comments