Skip to content

Commit f0580c7

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Select negative arithmetic immediates in manual selector
Previously, we only handled negative arithmetic immediates in the imported selector code. Since we don't import code for, say, compares, we were missing opportunities for things like ``` %cst:gpr(s64) = G_CONSTANT i64 -10 %cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst -> %adds = ADDSXri %reg0, 10, 0, implicit-def $nzcv %cmp = CSINCWr $wzr, $wzr, 1, implicit $nzcv ``` Instead, we would have to materialize the constant and emit a SUBS. This adds support for selection like above for SUB, SUBS, ADD, and ADDS. This is a 0.1% geomean code size improvement on SPECINT2000 at -Os. Differential Revision: https://reviews.llvm.org/D91108
1 parent f23c4c6 commit f0580c7

File tree

6 files changed

+167
-27
lines changed

6 files changed

+167
-27
lines changed

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -176,26 +176,19 @@ class AArch64InstructionSelector : public InstructionSelector {
176176
std::initializer_list<llvm::SrcOp> SrcOps,
177177
MachineIRBuilder &MIRBuilder,
178178
const ComplexRendererFns &RenderFns = None) const;
179-
/// Helper function to emit a binary operation such as an ADD, ADDS, etc.
180-
///
181-
/// This is intended for instructions with the following opcode variants:
182-
///
183-
/// - Xri, Wri (arithmetic immediate form)
184-
/// - Xrs, Wrs (shifted register form)
185-
/// - Xrr, Wrr (register form)
186-
///
187-
/// For example, for ADD, we have ADDXri, ADDWri, ADDXrs, etc.
179+
/// Helper function to emit an add or sub instruction.
188180
///
189181
/// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
190182
/// in a specific order.
191183
///
192184
/// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
193185
///
194186
/// \code
195-
/// const std::array<std::array<unsigned, 2>, 3> Table {
187+
/// const std::array<std::array<unsigned, 2>, 4> Table {
196188
/// {{AArch64::ADDXri, AArch64::ADDWri},
197189
/// {AArch64::ADDXrs, AArch64::ADDWrs},
198-
/// {AArch64::ADDXrr, AArch64::ADDWrr}}};
190+
/// {AArch64::ADDXrr, AArch64::ADDWrr},
191+
/// {AArch64::SUBXri, AArch64::SUBWri}}};
199192
/// \endcode
200193
///
201194
/// Each row in the table corresponds to a different addressing mode. Each
@@ -205,6 +198,7 @@ class AArch64InstructionSelector : public InstructionSelector {
205198
/// - Row 0: The ri opcode variants
206199
/// - Row 1: The rs opcode variants
207200
/// - Row 2: The rr opcode variants
201+
/// - Row 3: The ri opcode variants for negative immediates
208202
///
209203
/// \attention Columns must be structured as follows:
210204
/// - Column 0: The 64-bit opcode variants
@@ -213,8 +207,8 @@ class AArch64InstructionSelector : public InstructionSelector {
213207
/// \p Dst is the destination register of the binop to emit.
214208
/// \p LHS is the left-hand operand of the binop to emit.
215209
/// \p RHS is the right-hand operand of the binop to emit.
216-
MachineInstr *emitBinOp(
217-
const std::array<std::array<unsigned, 2>, 3> &AddrModeAndSizeToOpcode,
210+
MachineInstr *emitAddSub(
211+
const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
218212
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
219213
MachineIRBuilder &MIRBuilder) const;
220214
MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
@@ -3826,8 +3820,8 @@ MachineInstr *AArch64InstructionSelector::emitInstr(
38263820
return &*MI;
38273821
}
38283822

3829-
MachineInstr *AArch64InstructionSelector::emitBinOp(
3830-
const std::array<std::array<unsigned, 2>, 3> &AddrModeAndSizeToOpcode,
3823+
MachineInstr *AArch64InstructionSelector::emitAddSub(
3824+
const std::array<std::array<unsigned, 2>, 4> &AddrModeAndSizeToOpcode,
38313825
Register Dst, MachineOperand &LHS, MachineOperand &RHS,
38323826
MachineIRBuilder &MIRBuilder) const {
38333827
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
@@ -3837,9 +3831,18 @@ MachineInstr *AArch64InstructionSelector::emitBinOp(
38373831
unsigned Size = Ty.getSizeInBits();
38383832
assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
38393833
bool Is32Bit = Size == 32;
3834+
3835+
// INSTRri form with positive arithmetic immediate.
38403836
if (auto Fns = selectArithImmed(RHS))
38413837
return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
38423838
MIRBuilder, Fns);
3839+
3840+
// INSTRri form with negative arithmetic immediate.
3841+
if (auto Fns = selectNegArithImmed(RHS))
3842+
return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
3843+
MIRBuilder, Fns);
3844+
3845+
// INSTRrs form.
38433846
if (auto Fns = selectShiftedRegister(RHS))
38443847
return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
38453848
MIRBuilder, Fns);
@@ -3851,33 +3854,36 @@ MachineInstr *
38513854
AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
38523855
MachineOperand &RHS,
38533856
MachineIRBuilder &MIRBuilder) const {
3854-
const std::array<std::array<unsigned, 2>, 3> OpcTable{
3857+
const std::array<std::array<unsigned, 2>, 4> OpcTable{
38553858
{{AArch64::ADDXri, AArch64::ADDWri},
38563859
{AArch64::ADDXrs, AArch64::ADDWrs},
3857-
{AArch64::ADDXrr, AArch64::ADDWrr}}};
3858-
return emitBinOp(OpcTable, DefReg, LHS, RHS, MIRBuilder);
3860+
{AArch64::ADDXrr, AArch64::ADDWrr},
3861+
{AArch64::SUBXri, AArch64::SUBWri}}};
3862+
return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
38593863
}
38603864

38613865
MachineInstr *
38623866
AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
38633867
MachineOperand &RHS,
38643868
MachineIRBuilder &MIRBuilder) const {
3865-
const std::array<std::array<unsigned, 2>, 3> OpcTable{
3869+
const std::array<std::array<unsigned, 2>, 4> OpcTable{
38663870
{{AArch64::ADDSXri, AArch64::ADDSWri},
38673871
{AArch64::ADDSXrs, AArch64::ADDSWrs},
3868-
{AArch64::ADDSXrr, AArch64::ADDSWrr}}};
3869-
return emitBinOp(OpcTable, Dst, LHS, RHS, MIRBuilder);
3872+
{AArch64::ADDSXrr, AArch64::ADDSWrr},
3873+
{AArch64::SUBSXri, AArch64::SUBSWri}}};
3874+
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
38703875
}
38713876

38723877
MachineInstr *
38733878
AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
38743879
MachineOperand &RHS,
38753880
MachineIRBuilder &MIRBuilder) const {
3876-
const std::array<std::array<unsigned, 2>, 3> OpcTable{
3881+
const std::array<std::array<unsigned, 2>, 4> OpcTable{
38773882
{{AArch64::SUBSXri, AArch64::SUBSWri},
38783883
{AArch64::SUBSXrs, AArch64::SUBSWrs},
3879-
{AArch64::SUBSXrr, AArch64::SUBSWrr}}};
3880-
return emitBinOp(OpcTable, Dst, LHS, RHS, MIRBuilder);
3884+
{AArch64::SUBSXrr, AArch64::SUBSWrr},
3885+
{AArch64::ADDSXri, AArch64::ADDSWri}}};
3886+
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
38813887
}
38823888

38833889
MachineInstr *

llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-compare.mir

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,3 +577,29 @@ body: |
577577
%select:gpr(s32) = G_SELECT %cmp_trunc(s1), %one, %zero
578578
$w0 = COPY %select(s32)
579579
RET_ReallyLR implicit $w0
580+
581+
...
582+
---
583+
name: cmn_s32_neg_imm
584+
alignment: 4
585+
legalized: true
586+
regBankSelected: true
587+
tracksRegLiveness: true
588+
body: |
589+
bb.0:
590+
liveins: $w0, $w1
591+
592+
; CHECK-LABEL: name: cmn_s32_neg_imm
593+
; CHECK: liveins: $w0, $w1
594+
; CHECK: %reg0:gpr32sp = COPY $w0
595+
; CHECK: $wzr = SUBSWri %reg0, 1, 0, implicit-def $nzcv
596+
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv
597+
; CHECK: $w0 = COPY %cmp
598+
; CHECK: RET_ReallyLR implicit $w0
599+
%reg0:gpr(s32) = COPY $w0
600+
%negative_one:gpr(s32) = G_CONSTANT i32 -1
601+
%zero:gpr(s32) = G_CONSTANT i32 0
602+
%sub:gpr(s32) = G_SUB %zero, %negative_one
603+
%cmp:gpr(s32) = G_ICMP intpred(ne), %reg0(s32), %sub
604+
$w0 = COPY %cmp(s32)
605+
RET_ReallyLR implicit $w0

llvm/test/CodeGen/AArch64/GlobalISel/select-cmp.mir

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,70 @@ body: |
115115
%5:gpr(s32) = G_ICMP intpred(eq), %0(s32), %2
116116
$w0 = COPY %5(s32)
117117
RET_ReallyLR implicit $w0
118+
119+
...
120+
---
121+
name: cmp_neg_imm_32
122+
legalized: true
123+
regBankSelected: true
124+
tracksRegLiveness: true
125+
body: |
126+
bb.1:
127+
liveins: $w0
128+
; CHECK-LABEL: name: cmp_neg_imm_32
129+
; CHECK: liveins: $w0
130+
; CHECK: %reg0:gpr32sp = COPY $w0
131+
; CHECK: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri %reg0, 10, 0, implicit-def $nzcv
132+
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
133+
; CHECK: $w0 = COPY %cmp
134+
; CHECK: RET_ReallyLR implicit $w0
135+
%reg0:gpr(s32) = COPY $w0
136+
%cst:gpr(s32) = G_CONSTANT i32 -10
137+
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
138+
$w0 = COPY %cmp(s32)
139+
RET_ReallyLR implicit $w0
140+
141+
...
142+
---
143+
name: cmp_neg_imm_64
144+
legalized: true
145+
regBankSelected: true
146+
tracksRegLiveness: true
147+
body: |
148+
bb.1:
149+
liveins: $x0
150+
; CHECK-LABEL: name: cmp_neg_imm_64
151+
; CHECK: liveins: $x0
152+
; CHECK: %reg0:gpr64sp = COPY $x0
153+
; CHECK: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %reg0, 10, 0, implicit-def $nzcv
154+
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
155+
; CHECK: $w0 = COPY %cmp
156+
; CHECK: RET_ReallyLR implicit $w0
157+
%reg0:gpr(s64) = COPY $x0
158+
%cst:gpr(s64) = G_CONSTANT i64 -10
159+
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s64), %cst
160+
$w0 = COPY %cmp(s32)
161+
RET_ReallyLR implicit $w0
162+
163+
...
164+
---
165+
name: cmp_neg_imm_invalid
166+
legalized: true
167+
regBankSelected: true
168+
tracksRegLiveness: true
169+
body: |
170+
bb.1:
171+
liveins: $w0
172+
; CHECK-LABEL: name: cmp_neg_imm_invalid
173+
; CHECK: liveins: $w0
174+
; CHECK: %reg0:gpr32 = COPY $w0
175+
; CHECK: %cst:gpr32 = MOVi32imm -5000
176+
; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr %reg0, %cst, implicit-def $nzcv
177+
; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 1, implicit $nzcv
178+
; CHECK: $w0 = COPY %cmp
179+
; CHECK: RET_ReallyLR implicit $w0
180+
%reg0:gpr(s32) = COPY $w0
181+
%cst:gpr(s32) = G_CONSTANT i32 -5000
182+
%cmp:gpr(s32) = G_ICMP intpred(eq), %reg0(s32), %cst
183+
$w0 = COPY %cmp(s32)
184+
RET_ReallyLR implicit $w0

llvm/test/CodeGen/AArch64/GlobalISel/select-ptr-add.mir

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,20 @@ body: |
7272
%3:fpr(<2 x p0>) = G_PTR_ADD %0, %1(<2 x s64>)
7373
$q0 = COPY %3(<2 x p0>)
7474
...
75+
---
76+
name: ptr_add_neg_imm
77+
legalized: true
78+
regBankSelected: true
79+
body: |
80+
bb.0:
81+
liveins: $x0
82+
; CHECK-LABEL: name: ptr_add_neg_imm
83+
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
84+
; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
85+
; CHECK: [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[COPY1]], 10, 0, implicit-def $nzcv
86+
; CHECK: $x0 = COPY [[SUBSXri]]
87+
%0:gpr(p0) = COPY $x0
88+
%1:gpr(s64) = G_CONSTANT i64 -10
89+
%2:gpr(p0) = G_PTR_ADD %0, %1(s64)
90+
$x0 = COPY %2(p0)
91+
...

llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,28 @@ body: |
111111
%add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy1, %shift
112112
$w0 = COPY %add(s32)
113113
RET_ReallyLR implicit $w0
114+
115+
...
116+
---
117+
name: uaddo_s32_neg_imm
118+
alignment: 4
119+
legalized: true
120+
regBankSelected: true
121+
tracksRegLiveness: true
122+
body: |
123+
bb.1.entry:
124+
liveins: $w0, $w1, $x2
125+
; Check that we get SUBSWri when we can fold in a negative constant.
126+
;
127+
; CHECK-LABEL: name: uaddo_s32_neg_imm
128+
; CHECK: liveins: $w0, $w1, $x2
129+
; CHECK: %copy:gpr32sp = COPY $w0
130+
; CHECK: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
131+
; CHECK: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
132+
; CHECK: $w0 = COPY %add
133+
; CHECK: RET_ReallyLR implicit $w0
134+
%copy:gpr(s32) = COPY $w0
135+
%constant:gpr(s32) = G_CONSTANT i32 -16
136+
%add:gpr(s32), %overflow:gpr(s1) = G_UADDO %copy, %constant
137+
$w0 = COPY %add(s32)
138+
RET_ReallyLR implicit $w0

llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,9 +99,8 @@ body: |
9999
; CHECK: bb.0:
100100
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
101101
; CHECK: %copy:gpr64 = COPY $x0
102-
; CHECK: %negative_one:gpr64 = MOVi64imm -1
103-
; CHECK: %and:gpr64common = ANDXri %copy, 8000
104-
; CHECK: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr %and, %negative_one, implicit-def $nzcv
102+
; CHECK: %and:gpr64sp = ANDXri %copy, 8000
103+
; CHECK: [[ADDSXri:%[0-9]+]]:gpr64 = ADDSXri %and, 1, 0, implicit-def $nzcv
105104
; CHECK: Bcc 12, %bb.1, implicit $nzcv
106105
; CHECK: B %bb.0
107106
; CHECK: bb.1:

0 commit comments

Comments
 (0)