Skip to content

Commit 637ab08

Browse files
committed
[ARM] Stop gluing 1-bit shifts
Use normal data flow instead. There are several more nodes that are still glued, I'll try to change that in subsequent patches.
1 parent f0b8025 commit 637ab08

File tree

4 files changed

+67
-48
lines changed

4 files changed

+67
-48
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
149149
cl::desc("Maximum interleave factor for MVE VLDn to generate."),
150150
cl::init(2));
151151

152+
/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
153+
constexpr MVT FlagsVT = MVT::i32;
154+
152155
// The APCS parameter registers.
153156
static const MCPhysReg GPRArgRegs[] = {
154157
ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -6850,7 +6853,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
68506853
// First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
68516854
// captures the result into a carry flag.
68526855
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
6853-
Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6856+
Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, FlagsVT), Hi);
68546857

68556858
// The low part is an ARMISD::RRX operand, which shifts the carry in.
68566859
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
// ARM specific DAG Nodes.
1515
//
1616

17+
/// Value type used for "flags" operands / results (either CPSR or FPSCR_NZCV).
18+
defvar FlagsVT = i32;
19+
1720
// Type profiles.
1821
def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>,
1922
SDTCisVT<1, i32> ]>;
@@ -77,6 +80,18 @@ def SDT_ARMMEMCPY : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
7780
SDTCisVT<2, i32>, SDTCisVT<3, i32>,
7881
SDTCisVT<4, i32>]>;
7982

83+
def SDTIntUnaryOpWithFlagsOut : SDTypeProfile<2, 1, [
84+
SDTCisInt<0>, // result
85+
SDTCisVT<1, FlagsVT>, // out flags
86+
SDTCisSameAs<2, 0> // operand
87+
]>;
88+
89+
def SDTIntUnaryOpWithFlagsIn : SDTypeProfile<1, 2, [
90+
SDTCisInt<0>, // result
91+
SDTCisSameAs<1, 0>, // operand
92+
SDTCisVT<1, FlagsVT> // in flags
93+
]>;
94+
8095
def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
8196
[SDTCisSameAs<0, 2>,
8297
SDTCisSameAs<0, 3>,
@@ -191,9 +206,9 @@ def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
191206
def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
192207
def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
193208

194-
def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
195-
def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
196-
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
209+
def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOpWithFlagsOut>;
210+
def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOpWithFlagsOut>;
211+
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>;
197212

198213
def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
199214
[SDNPCommutative]>;
@@ -3731,19 +3746,19 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
37313746

37323747
let Uses = [CPSR] in
37333748
def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
3734-
[(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
3735-
Requires<[IsARM]>, Sched<[WriteALU]>;
3749+
[(set GPR:$Rd, (ARMrrx GPR:$Rm, CPSR))]>,
3750+
UnaryDP, Requires<[IsARM]>, Sched<[WriteALU]>;
37363751

37373752
// These aren't really mov instructions, but we have to define them this way
37383753
// due to glue operands.
37393754

37403755
let Defs = [CPSR] in {
3741-
def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
3742-
[(set GPR:$dst, (ARMsrl_glue GPR:$src))]>, UnaryDP,
3743-
Sched<[WriteALU]>, Requires<[IsARM]>;
3744-
def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
3745-
[(set GPR:$dst, (ARMsra_glue GPR:$src))]>, UnaryDP,
3746-
Sched<[WriteALU]>, Requires<[IsARM]>;
3756+
def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
3757+
[(set GPR:$dst, CPSR, (ARMsrl_glue GPR:$src))]>,
3758+
UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
3759+
def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
3760+
[(set GPR:$dst, CPSR, (ARMsra_glue GPR:$src))]>,
3761+
UnaryDP, Sched<[WriteALU]>, Requires<[IsARM]>;
37473762
}
37483763

37493764
//===----------------------------------------------------------------------===//

llvm/lib/Target/ARM/ARMInstrThumb2.td

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2787,8 +2787,9 @@ def : T2Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)),
27872787

27882788
let Uses = [CPSR] in {
27892789
def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
2790-
"rrx", "\t$Rd, $Rm",
2791-
[(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]>, Sched<[WriteALU]> {
2790+
"rrx", "\t$Rd, $Rm",
2791+
[(set rGPR:$Rd, (ARMrrx rGPR:$Rm, CPSR))]>,
2792+
Sched<[WriteALU]> {
27922793
let Inst{31-27} = 0b11101;
27932794
let Inst{26-25} = 0b01;
27942795
let Inst{24-21} = 0b0010;
@@ -2801,11 +2802,11 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
28012802
}
28022803

28032804
let isCodeGenOnly = 1, Defs = [CPSR] in {
2804-
def t2MOVsrl_glue : T2TwoRegShiftImm<
2805-
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
2806-
"lsrs", ".w\t$Rd, $Rm, #1",
2807-
[(set rGPR:$Rd, (ARMsrl_glue rGPR:$Rm))]>,
2808-
Sched<[WriteALU]> {
2805+
def t2MOVsrl_glue
2806+
: T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
2807+
"lsrs", ".w\t$Rd, $Rm, #1",
2808+
[(set rGPR:$Rd, CPSR, (ARMsrl_glue rGPR:$Rm))]>,
2809+
Sched<[WriteALU]> {
28092810
let Inst{31-27} = 0b11101;
28102811
let Inst{26-25} = 0b01;
28112812
let Inst{24-21} = 0b0010;
@@ -2816,11 +2817,11 @@ def t2MOVsrl_glue : T2TwoRegShiftImm<
28162817
let Inst{14-12} = 0b000;
28172818
let Inst{7-6} = 0b01;
28182819
}
2819-
def t2MOVsra_glue : T2TwoRegShiftImm<
2820-
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
2821-
"asrs", ".w\t$Rd, $Rm, #1",
2822-
[(set rGPR:$Rd, (ARMsra_glue rGPR:$Rm))]>,
2823-
Sched<[WriteALU]> {
2820+
def t2MOVsra_glue
2821+
: T2TwoRegShiftImm<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
2822+
"asrs", ".w\t$Rd, $Rm, #1",
2823+
[(set rGPR:$Rd, CPSR, (ARMsra_glue rGPR:$Rm))]>,
2824+
Sched<[WriteALU]> {
28242825
let Inst{31-27} = 0b11101;
28252826
let Inst{26-25} = 0b01;
28262827
let Inst{24-21} = 0b0010;

llvm/test/CodeGen/ARM/urem-seteq-illegal-types.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -628,13 +628,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
628628
; ARM5-NEXT: mla r0, r1, r12, r4
629629
; ARM5-NEXT: bic r0, r0, #-2147483648
630630
; ARM5-NEXT: lsrs r0, r0, #1
631-
; ARM5-NEXT: rrx r1, r3
631+
; ARM5-NEXT: rrx r2, r3
632632
; ARM5-NEXT: orr r0, r0, r3, lsl #30
633633
; ARM5-NEXT: ldr r3, .LCPI5_2
634-
; ARM5-NEXT: bic r2, r0, #-2147483648
634+
; ARM5-NEXT: bic r1, r0, #-2147483648
635635
; ARM5-NEXT: mov r0, #0
636-
; ARM5-NEXT: subs r1, r1, r3
637-
; ARM5-NEXT: sbcs r1, r2, #1
636+
; ARM5-NEXT: subs r2, r2, r3
637+
; ARM5-NEXT: sbcs r1, r1, #1
638638
; ARM5-NEXT: movlo r0, #1
639639
; ARM5-NEXT: pop {r4, pc}
640640
; ARM5-NEXT: .p2align 2
@@ -656,13 +656,13 @@ define i1 @test_urem_larger(i63 %X) nounwind {
656656
; ARM6-NEXT: mla r0, r1, r12, r0
657657
; ARM6-NEXT: bic r0, r0, #-2147483648
658658
; ARM6-NEXT: lsrs r0, r0, #1
659-
; ARM6-NEXT: rrx r1, r3
659+
; ARM6-NEXT: rrx r2, r3
660660
; ARM6-NEXT: orr r0, r0, r3, lsl #30
661661
; ARM6-NEXT: ldr r3, .LCPI5_2
662-
; ARM6-NEXT: bic r2, r0, #-2147483648
662+
; ARM6-NEXT: bic r1, r0, #-2147483648
663663
; ARM6-NEXT: mov r0, #0
664-
; ARM6-NEXT: subs r1, r1, r3
665-
; ARM6-NEXT: sbcs r1, r2, #1
664+
; ARM6-NEXT: subs r2, r2, r3
665+
; ARM6-NEXT: sbcs r1, r1, #1
666666
; ARM6-NEXT: movlo r0, #1
667667
; ARM6-NEXT: pop {r11, pc}
668668
; ARM6-NEXT: .p2align 2
@@ -686,14 +686,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
686686
; ARM7-NEXT: mla r0, r1, r12, r0
687687
; ARM7-NEXT: bic r0, r0, #-2147483648
688688
; ARM7-NEXT: lsrs r0, r0, #1
689-
; ARM7-NEXT: rrx r1, r3
689+
; ARM7-NEXT: rrx r2, r3
690690
; ARM7-NEXT: orr r0, r0, r3, lsl #30
691691
; ARM7-NEXT: movw r3, #24026
692-
; ARM7-NEXT: bic r2, r0, #-2147483648
692+
; ARM7-NEXT: bic r1, r0, #-2147483648
693693
; ARM7-NEXT: movt r3, #48461
694-
; ARM7-NEXT: subs r1, r1, r3
694+
; ARM7-NEXT: subs r2, r2, r3
695695
; ARM7-NEXT: mov r0, #0
696-
; ARM7-NEXT: sbcs r1, r2, #1
696+
; ARM7-NEXT: sbcs r1, r1, #1
697697
; ARM7-NEXT: movwlo r0, #1
698698
; ARM7-NEXT: pop {r11, pc}
699699
;
@@ -709,14 +709,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
709709
; ARM8-NEXT: mla r0, r1, r12, r0
710710
; ARM8-NEXT: bic r0, r0, #-2147483648
711711
; ARM8-NEXT: lsrs r0, r0, #1
712-
; ARM8-NEXT: rrx r1, r3
712+
; ARM8-NEXT: rrx r2, r3
713713
; ARM8-NEXT: orr r0, r0, r3, lsl #30
714714
; ARM8-NEXT: movw r3, #24026
715-
; ARM8-NEXT: bic r2, r0, #-2147483648
715+
; ARM8-NEXT: bic r1, r0, #-2147483648
716716
; ARM8-NEXT: movt r3, #48461
717-
; ARM8-NEXT: subs r1, r1, r3
717+
; ARM8-NEXT: subs r2, r2, r3
718718
; ARM8-NEXT: mov r0, #0
719-
; ARM8-NEXT: sbcs r1, r2, #1
719+
; ARM8-NEXT: sbcs r1, r1, #1
720720
; ARM8-NEXT: movwlo r0, #1
721721
; ARM8-NEXT: pop {r11, pc}
722722
;
@@ -732,14 +732,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
732732
; NEON7-NEXT: mla r0, r1, r12, r0
733733
; NEON7-NEXT: bic r0, r0, #-2147483648
734734
; NEON7-NEXT: lsrs r0, r0, #1
735-
; NEON7-NEXT: rrx r1, r3
735+
; NEON7-NEXT: rrx r2, r3
736736
; NEON7-NEXT: orr r0, r0, r3, lsl #30
737737
; NEON7-NEXT: movw r3, #24026
738-
; NEON7-NEXT: bic r2, r0, #-2147483648
738+
; NEON7-NEXT: bic r1, r0, #-2147483648
739739
; NEON7-NEXT: movt r3, #48461
740-
; NEON7-NEXT: subs r1, r1, r3
740+
; NEON7-NEXT: subs r2, r2, r3
741741
; NEON7-NEXT: mov r0, #0
742-
; NEON7-NEXT: sbcs r1, r2, #1
742+
; NEON7-NEXT: sbcs r1, r1, #1
743743
; NEON7-NEXT: movwlo r0, #1
744744
; NEON7-NEXT: pop {r11, pc}
745745
;
@@ -755,14 +755,14 @@ define i1 @test_urem_larger(i63 %X) nounwind {
755755
; NEON8-NEXT: mla r0, r1, r12, r0
756756
; NEON8-NEXT: bic r0, r0, #-2147483648
757757
; NEON8-NEXT: lsrs r0, r0, #1
758-
; NEON8-NEXT: rrx r1, r3
758+
; NEON8-NEXT: rrx r2, r3
759759
; NEON8-NEXT: orr r0, r0, r3, lsl #30
760760
; NEON8-NEXT: movw r3, #24026
761-
; NEON8-NEXT: bic r2, r0, #-2147483648
761+
; NEON8-NEXT: bic r1, r0, #-2147483648
762762
; NEON8-NEXT: movt r3, #48461
763-
; NEON8-NEXT: subs r1, r1, r3
763+
; NEON8-NEXT: subs r2, r2, r3
764764
; NEON8-NEXT: mov r0, #0
765-
; NEON8-NEXT: sbcs r1, r2, #1
765+
; NEON8-NEXT: sbcs r1, r1, #1
766766
; NEON8-NEXT: movwlo r0, #1
767767
; NEON8-NEXT: pop {r11, pc}
768768
%urem = urem i63 %X, 1234567890

0 commit comments

Comments
 (0)