Skip to content

[PowerPC] Use setbc for values from vector compare conditions #114858

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 45 additions & 17 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1858,6 +1858,10 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
case PPCISD::STORE_COND:
return "PPCISD::STORE_COND";
case PPCISD::SETBC:
return "PPCISD::SETBC";
case PPCISD::SETBCR:
return "PPCISD::SETBCR";
}
return nullptr;
}
Expand Down Expand Up @@ -11264,31 +11268,55 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);

// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
DAG.getRegister(PPC::CR6, MVT::i32),
CompNode.getValue(1));

// Unpack the result based on how the target uses it.
unsigned BitNo; // Bit # of CR6.
bool InvertBit; // Invert result?
unsigned BitNo; // Bit # of CR6.
bool InvertBit; // Invert result?
unsigned Bitx;
unsigned SetOp;
switch (Op.getConstantOperandVal(1)) {
default: // Can't happen, don't crash on invalid number though.
case 0: // Return the value of the EQ bit of CR6.
BitNo = 0; InvertBit = false;
default: // Can't happen, don't crash on invalid number though.
case 0: // Return the value of the EQ bit of CR6.
BitNo = 0;
InvertBit = false;
Bitx = PPC::sub_eq;
SetOp = PPCISD::SETBC;
break;
case 1: // Return the inverted value of the EQ bit of CR6.
BitNo = 0; InvertBit = true;
case 1: // Return the inverted value of the EQ bit of CR6.
BitNo = 0;
InvertBit = true;
Bitx = PPC::sub_eq;
SetOp = PPCISD::SETBCR;
break;
case 2: // Return the value of the LT bit of CR6.
BitNo = 2; InvertBit = false;
case 2: // Return the value of the LT bit of CR6.
BitNo = 2;
InvertBit = false;
Bitx = PPC::sub_lt;
SetOp = PPCISD::SETBC;
break;
case 3: // Return the inverted value of the LT bit of CR6.
BitNo = 2; InvertBit = true;
case 3: // Return the inverted value of the LT bit of CR6.
BitNo = 2;
InvertBit = true;
Bitx = PPC::sub_lt;
SetOp = PPCISD::SETBCR;
break;
}

SDValue GlueOp = CompNode.getValue(1);
if (Subtarget.isISA3_1()) {
SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
SDValue CRBit =
SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
CR6Reg, SubRegIdx, GlueOp),
0);
return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
}

// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);

// Shift the bit into the low position.
Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,12 @@ namespace llvm {
/// Constrained floating point add in round-to-zero mode.
STRICT_FADDRTZ,

/// SETBC - The ISA 3.1 (P10) SETBC instruction.
SETBC,

/// SETBCR - The ISA 3.1 (P10) SETBCR instruction.
SETBCR,

// NOTE: The nodes below may require PC-Rel specific patterns if the
// address could be PC-Relative. When adding new nodes below, consider
// whether or not the address can be PC-Relative and add the corresponding
Expand Down
12 changes: 10 additions & 2 deletions llvm/lib/Target/PowerPC/PPCInstrP10.td
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ def SDT_PPCxxmfacc : SDTypeProfile<1, 1, [
SDTCisVT<0, v512i1>, SDTCisVT<1, v512i1>
]>;

def SDT_PPCsetbc : SDTypeProfile<1, 1, [
SDTCisInt<0>, SDTCisInt<1>
]>;

//===----------------------------------------------------------------------===//
// ISA 3.1 specific PPCISD nodes.
//
Expand All @@ -91,6 +95,8 @@ def PPCAccExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCAccExtractVsx,
def PPCPairExtractVsx : SDNode<"PPCISD::EXTRACT_VSX_REG", SDT_PPCPairExtractVsx,
[]>;
def PPCxxmfacc : SDNode<"PPCISD::XXMFACC", SDT_PPCxxmfacc, []>;
def PPCsetbc : SDNode<"PPCISD::SETBC", SDT_PPCsetbc, []>;
def PPCsetbcr : SDNode<"PPCISD::SETBCR", SDT_PPCsetbc, []>;

//===----------------------------------------------------------------------===//

Expand Down Expand Up @@ -1397,10 +1403,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, Predicates = [P

let Predicates = [IsISA3_1] in {
def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RST), (ins crbitrc:$BI),
"setbc $RST, $BI", IIC_IntCompare, []>,
"setbc $RST, $BI", IIC_IntCompare,
[(set i32:$RST, (PPCsetbc i1:$BI))]>,
SExt32To64, ZExt32To64;
def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RST), (ins crbitrc:$BI),
"setbcr $RST, $BI", IIC_IntCompare, []>,
"setbcr $RST, $BI", IIC_IntCompare,
[(set i32:$RST, (PPCsetbcr i1:$BI))]>,
SExt32To64, ZExt32To64;
def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RST), (ins crbitrc:$BI),
"setnbc $RST, $BI", IIC_IntCompare, []>,
Expand Down
15 changes: 15 additions & 0 deletions llvm/test/CodeGen/PowerPC/vcmp-setbc-quad.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
; RUN: llc -mcpu=pwr10 -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
; RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s

define range(i64 -2147483648, 2147483648) i64 @cmpgt(<1 x i128> noundef %a, <1 x i128> noundef %b) local_unnamed_addr {
; CHECK: vcmpgtuq. v2, v3, v2
; CHECK: setbc r3, 4*cr6+lt
entry:
%0 = tail call i32 @llvm.ppc.altivec.vcmpgtuq.p(i32 2, <1 x i128> %b, <1 x i128> %a)
%conv = sext i32 %0 to i64
ret i64 %conv
}

declare i32 @llvm.ppc.altivec.vcmpgtuq.p(i32, <1 x i128>, <1 x i128>)
46 changes: 46 additions & 0 deletions llvm/test/CodeGen/PowerPC/vcmp-setbc.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
; RUN: llc -mcpu=pwr10 -mtriple=powerpc64-ibm-aix -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
; RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
; RUN: llc -mcpu=pwr10 -mtriple=powerpc-ibm-aix -ppc-asm-full-reg-names \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s

define signext i32 @cmpgtw(<4 x i32> noundef %a, <4 x i32> noundef %b) local_unnamed_addr {
; CHECK: vcmpgtsw. v2, v2, v3
; CHECK: setbc r3, 4*cr6+lt
entry:
%0 = tail call i32 @llvm.ppc.altivec.vcmpgtsw.p(i32 2, <4 x i32> %a, <4 x i32> %b)
ret i32 %0
}

define signext i32 @cmpanynew(<4 x i32> noundef %a, <4 x i32> noundef %b) local_unnamed_addr {
; CHECK: vcmpequw. v2, v2, v3
; CHECK: setbcr r3, 4*cr6+lt
entry:
%0 = tail call i32 @llvm.ppc.altivec.vcmpequw.p(i32 3, <4 x i32> %a, <4 x i32> %b)
ret i32 %0
}

define signext i32 @cmpallneh(<8 x i16> noundef %a, <8 x i16> noundef %b) local_unnamed_addr {
; CHECK: vcmpequh. v2, v2, v3
; CHECK: setbc r3, 4*cr6+eq
entry:
%0 = tail call i32 @llvm.ppc.altivec.vcmpequh.p(i32 0, <8 x i16> %a, <8 x i16> %b)
ret i32 %0
}

define signext i32 @cmpeqb(<16 x i8> noundef %a, <16 x i8> noundef %b) local_unnamed_addr {
; CHECK: vcmpequb. v2, v2, v3
; CHECK: setbcr r3, 4*cr6+eq
entry:
%0 = tail call i32 @llvm.ppc.altivec.vcmpequb.p(i32 1, <16 x i8> %a, <16 x i8> %b)
ret i32 %0
}

declare i32 @llvm.ppc.altivec.vcmpgtsw.p(i32, <4 x i32>, <4 x i32>)

declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)

declare i32 @llvm.ppc.altivec.vcmpequh.p(i32, <8 x i16>, <8 x i16>)

declare i32 @llvm.ppc.altivec.vcmpequb.p(i32, <16 x i8>, <16 x i8>)
Loading