Skip to content

[ARM] Stop gluing 1-bit shifts #116547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2590,14 +2590,14 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}

case ARM::MOVsrl_glue:
case ARM::MOVsra_glue: {
case ARM::LSRs1:
case ARM::ASRs1: {
// These are just fancy MOVs instructions.
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.add(MI.getOperand(1))
.addImm(ARM_AM::getSORegOpc(
(Opcode == ARM::MOVsrl_glue ? ARM_AM::lsr : ARM_AM::asr), 1))
(Opcode == ARM::LSRs1 ? ARM_AM::lsr : ARM_AM::asr), 1))
.add(predOps(ARMCC::AL))
.addReg(ARM::CPSR, RegState::Define);
MI.eraseFromParent();
Expand Down
17 changes: 10 additions & 7 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
cl::init(2));

/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
constexpr MVT FlagsVT = MVT::i32;

// The APCS parameter registers.
static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
Expand Down Expand Up @@ -1730,14 +1733,14 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(ARMISD::ASRL)
MAKE_CASE(ARMISD::LSRL)
MAKE_CASE(ARMISD::LSLL)
MAKE_CASE(ARMISD::SRL_GLUE)
MAKE_CASE(ARMISD::SRA_GLUE)
MAKE_CASE(ARMISD::LSLS)
MAKE_CASE(ARMISD::LSRS1)
MAKE_CASE(ARMISD::ASRS1)
MAKE_CASE(ARMISD::RRX)
MAKE_CASE(ARMISD::ADDC)
MAKE_CASE(ARMISD::ADDE)
MAKE_CASE(ARMISD::SUBC)
MAKE_CASE(ARMISD::SUBE)
MAKE_CASE(ARMISD::LSLS)
MAKE_CASE(ARMISD::VMOVRRD)
MAKE_CASE(ARMISD::VMOVDRR)
MAKE_CASE(ARMISD::VMOVhr)
Expand Down Expand Up @@ -6847,10 +6850,10 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);

// First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
// captures the result into a carry flag.
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
// First, build a LSRS1/ASRS1 op, which shifts the top part by one and
// captures the shifted out bit into a carry flag.
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::LSRS1 : ARMISD::ASRS1;
Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);

// The low part is an ARMISD::RRX operand, which shifts the carry in.
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/ARM/ARMISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,15 +101,15 @@ class VectorType;

BCC_i64,

SRL_GLUE, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
SRA_GLUE, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
LSLS, // Flag-setting shift left.
LSRS1, // Flag-setting logical shift right by one bit.
ASRS1, // Flag-setting arithmetic shift right by one bit.
RRX, // Shift right one bit with carry in.

ADDC, // Add with carry
ADDE, // Add using carry
SUBC, // Sub with carry
SUBE, // Sub using carry
LSLS, // Shift left producing carry

VMOVRRD, // double to two gprs.
VMOVDRR, // Two gprs to double.
Expand Down
42 changes: 27 additions & 15 deletions llvm/lib/Target/ARM/ARMInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
// ARM specific DAG Nodes.
//

/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
defvar FlagsVT = i32;

// Type profiles.
def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
SDTCisVT<1, i32> ]>;
Expand Down Expand Up @@ -77,6 +80,18 @@ def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>,
SDTCisVT<4, i32>]>;

def SDTIntUnaryOpWithFlagsOut : SDTypeProfile<2, 1, [
SDTCisInt<0>, // result
SDTCisVT<1, FlagsVT>, // out flags
SDTCisSameAs<2, 0> // operand
]>;

def SDTIntUnaryOpWithFlagsIn : SDTypeProfile<1, 2, [
SDTCisInt<0>, // result
SDTCisSameAs<1, 0>, // operand
SDTCisVT<1, FlagsVT> // in flags
]>;

def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
[SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
Expand Down Expand Up @@ -191,9 +206,9 @@ def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;

def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
def ARMlsrs1 : SDNode<"ARMISD::LSRS1", SDTIntUnaryOpWithFlagsOut>;
def ARMasrs1 : SDNode<"ARMISD::ASRS1", SDTIntUnaryOpWithFlagsOut>;
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>;

def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
Expand Down Expand Up @@ -3730,20 +3745,17 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
Requires<[IsARM, HasV6T2]>;

let Uses = [CPSR] in
def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
[(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
Requires<[IsARM]>, Sched<[WriteALU]>;

// These aren't really mov instructions, but we have to define them this way
// due to glue operands.
def RRX : PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
[(set GPR:$Rd, (ARMrrx GPR:$Rm, CPSR))]>,
UnaryDP, Requires<[IsARM]>, Sched<[WriteALU]>;

let Defs = [CPSR] in {
def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
[(set GPR:$dst, (ARMsrl_glue GPR:$src))]>, UnaryDP,
Sched<[WriteALU]>, Requires<[IsARM]>;
def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
[(set GPR:$dst, (ARMsra_glue GPR:$src))]>, UnaryDP,
Sched<[WriteALU]>, Requires<[IsARM]>;
def LSRs1 : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
[(set GPR:$dst, CPSR, (ARMlsrs1 GPR:$src))]>,
UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
def ASRs1 : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
[(set GPR:$dst, CPSR, (ARMasrs1 GPR:$src))]>,
UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
}

//===----------------------------------------------------------------------===//
Expand Down
25 changes: 13 additions & 12 deletions llvm/lib/Target/ARM/ARMInstrThumb2.td
Original file line number Diff line number Diff line change
Expand Up @@ -2787,8 +2787,9 @@ def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),

let Uses = [CPSR] in {
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"rrx", "\t$Rd, $Rm",
[(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> {
"rrx", "\t$Rd, $Rm",
[(set rGPR:$Rd, (ARMrrx rGPR:$Rm, CPSR))]>,
Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
Expand All @@ -2800,12 +2801,13 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
}
}

// These differ from t2LSRri / t2ASRri in that they are flag-setting
// and have a hardcoded shift amount = 1.
let isCodeGenOnly = 1, Defs = [CPSR] in {
def t2MOVsrl_glue : T2TwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"lsrs", ".w\t$Rd, $Rm, #1",
[(set rGPR:$Rd, (ARMsrl_glue rGPR:$Rm))]>,
Sched<[WriteALU]> {
def t2LSRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"lsrs", ".w\t$Rd, $Rm, #1",
[(set rGPR:$Rd, CPSR, (ARMlsrs1 rGPR:$Rm))]>,
Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
Expand All @@ -2816,11 +2818,10 @@ def t2MOVsrl_glue : T2TwoRegShiftImm<
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
def t2MOVsra_glue : T2TwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"asrs", ".w\t$Rd, $Rm, #1",
[(set rGPR:$Rd, (ARMsra_glue rGPR:$Rm))]>,
Sched<[WriteALU]> {
def t2ASRs1 : T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"asrs", ".w\t$Rd, $Rm, #1",
[(set rGPR:$Rd, CPSR, (ARMasrs1 rGPR:$Rm))]>,
Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMScheduleM7.td
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ def M7Ex1ReadNoFastBypass : SchedReadAdvance<-1, [WriteLd, M7LoadLatency1]>;
def : InstRW<[WriteALUsi, M7Ex1ReadNoFastBypass, M7Read_ISS],
(instregex "t2(ADC|ADDS|ADD|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|SUBS)rs$",
"t2(SUB|CMP|CMNz|TEQ|TST)rs$",
"t2MOVsr(a|l)")>;
"t2(A|L)SRs1$")>;
def : InstRW<[WriteALUsi, M7Read_ISS],
(instregex "t2MVNs")>;

Expand All @@ -335,7 +335,7 @@ def : InstRW<[WriteALUsi, M7Read_ISS],
// but the results prove to be better than trying to get them exact.

def : InstRW<[M7WriteShift2, M7Read_ISS], (instregex "t2RRX$")>;
def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)")>;
def : InstRW<[WriteALUsi], (instregex "(t|t2)(LSL|LSR|ASR|ROR)r", "tROR")>;

// Instructions that use the shifter, but have normal timing.

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/ARM/ARMScheduleM85.td
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,7 @@ def : InstRW<[M85WriteALUsi, M85ReadALUsi],
def : InstRW<[M85WriteShift2],
(instregex "t2RRX$")>;
def : InstRW<[WriteALU],
(instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)", "t2MOVsr(a|l)")>;
(instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)")>;

// Instructions that use the shifter, but have normal timing

Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -628,13 +628,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM5-NEXT: mla r0, r1, r12, r4
; ARM5-NEXT: bic r0, r0, #-2147483648
; ARM5-NEXT: lsrs r0, r0, #1
; ARM5-NEXT: rrx r1, r3
; ARM5-NEXT: rrx r2, r3
; ARM5-NEXT: orr r0, r0, r3, lsl #30
; ARM5-NEXT: ldr r3, .LCPI5_2
; ARM5-NEXT: bic r2, r0, #-2147483648
; ARM5-NEXT: bic r1, r0, #-2147483648
; ARM5-NEXT: mov r0, #0
; ARM5-NEXT: subs r1, r1, r3
; ARM5-NEXT: sbcs r1, r2, #1
; ARM5-NEXT: subs r2, r2, r3
; ARM5-NEXT: sbcs r1, r1, #1
; ARM5-NEXT: movlo r0, #1
; ARM5-NEXT: pop {r4, pc}
; ARM5-NEXT: .p2align 2
Expand All @@ -656,13 +656,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM6-NEXT: mla r0, r1, r12, r0
; ARM6-NEXT: bic r0, r0, #-2147483648
; ARM6-NEXT: lsrs r0, r0, #1
; ARM6-NEXT: rrx r1, r3
; ARM6-NEXT: rrx r2, r3
; ARM6-NEXT: orr r0, r0, r3, lsl #30
; ARM6-NEXT: ldr r3, .LCPI5_2
; ARM6-NEXT: bic r2, r0, #-2147483648
; ARM6-NEXT: bic r1, r0, #-2147483648
; ARM6-NEXT: mov r0, #0
; ARM6-NEXT: subs r1, r1, r3
; ARM6-NEXT: sbcs r1, r2, #1
; ARM6-NEXT: subs r2, r2, r3
; ARM6-NEXT: sbcs r1, r1, #1
; ARM6-NEXT: movlo r0, #1
; ARM6-NEXT: pop {r11, pc}
; ARM6-NEXT: .p2align 2
Expand All @@ -686,14 +686,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM7-NEXT: mla r0, r1, r12, r0
; ARM7-NEXT: bic r0, r0, #-2147483648
; ARM7-NEXT: lsrs r0, r0, #1
; ARM7-NEXT: rrx r1, r3
; ARM7-NEXT: rrx r2, r3
; ARM7-NEXT: orr r0, r0, r3, lsl #30
; ARM7-NEXT: movw r3, #24026
; ARM7-NEXT: bic r2, r0, #-2147483648
; ARM7-NEXT: bic r1, r0, #-2147483648
; ARM7-NEXT: movt r3, #48461
; ARM7-NEXT: subs r1, r1, r3
; ARM7-NEXT: subs r2, r2, r3
; ARM7-NEXT: mov r0, #0
; ARM7-NEXT: sbcs r1, r2, #1
; ARM7-NEXT: sbcs r1, r1, #1
; ARM7-NEXT: movwlo r0, #1
; ARM7-NEXT: pop {r11, pc}
;
Expand All @@ -709,14 +709,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; ARM8-NEXT: mla r0, r1, r12, r0
; ARM8-NEXT: bic r0, r0, #-2147483648
; ARM8-NEXT: lsrs r0, r0, #1
; ARM8-NEXT: rrx r1, r3
; ARM8-NEXT: rrx r2, r3
; ARM8-NEXT: orr r0, r0, r3, lsl #30
; ARM8-NEXT: movw r3, #24026
; ARM8-NEXT: bic r2, r0, #-2147483648
; ARM8-NEXT: bic r1, r0, #-2147483648
; ARM8-NEXT: movt r3, #48461
; ARM8-NEXT: subs r1, r1, r3
; ARM8-NEXT: subs r2, r2, r3
; ARM8-NEXT: mov r0, #0
; ARM8-NEXT: sbcs r1, r2, #1
; ARM8-NEXT: sbcs r1, r1, #1
; ARM8-NEXT: movwlo r0, #1
; ARM8-NEXT: pop {r11, pc}
;
Expand All @@ -732,14 +732,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; NEON7-NEXT: mla r0, r1, r12, r0
; NEON7-NEXT: bic r0, r0, #-2147483648
; NEON7-NEXT: lsrs r0, r0, #1
; NEON7-NEXT: rrx r1, r3
; NEON7-NEXT: rrx r2, r3
; NEON7-NEXT: orr r0, r0, r3, lsl #30
; NEON7-NEXT: movw r3, #24026
; NEON7-NEXT: bic r2, r0, #-2147483648
; NEON7-NEXT: bic r1, r0, #-2147483648
; NEON7-NEXT: movt r3, #48461
; NEON7-NEXT: subs r1, r1, r3
; NEON7-NEXT: subs r2, r2, r3
; NEON7-NEXT: mov r0, #0
; NEON7-NEXT: sbcs r1, r2, #1
; NEON7-NEXT: sbcs r1, r1, #1
; NEON7-NEXT: movwlo r0, #1
; NEON7-NEXT: pop {r11, pc}
;
Expand All @@ -755,14 +755,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
; NEON8-NEXT: mla r0, r1, r12, r0
; NEON8-NEXT: bic r0, r0, #-2147483648
; NEON8-NEXT: lsrs r0, r0, #1
; NEON8-NEXT: rrx r1, r3
; NEON8-NEXT: rrx r2, r3
; NEON8-NEXT: orr r0, r0, r3, lsl #30
; NEON8-NEXT: movw r3, #24026
; NEON8-NEXT: bic r2, r0, #-2147483648
; NEON8-NEXT: bic r1, r0, #-2147483648
; NEON8-NEXT: movt r3, #48461
; NEON8-NEXT: subs r1, r1, r3
; NEON8-NEXT: subs r2, r2, r3
; NEON8-NEXT: mov r0, #0
; NEON8-NEXT: sbcs r1, r2, #1
; NEON8-NEXT: sbcs r1, r1, #1
; NEON8-NEXT: movwlo r0, #1
; NEON8-NEXT: pop {r11, pc}
%urem = urem i63 %X, 1234567890
Expand Down
Loading