Skip to content

Commit 75358f0

Browse files
committed
[AArch64] Lower multiplication by a constant int to madd
Lower a = b * C -1 into madd a) instcombine change b * C -1 --> b * C + (-1) b) machine-combine change b * C + (-1) --> madd Assembler will transform the neg immedate of sub to add, see https://gcc.godbolt.org/z/cTcxePPf4 Fixes AArch64 part of #57255. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D134336
1 parent d85f6e5 commit 75358f0

File tree

7 files changed

+114
-39
lines changed

7 files changed

+114
-39
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 50 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5796,7 +5796,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
57965796
case MachineCombinerPattern::MULADDXI_OP1: {
57975797
// MUL I=A,B,0
57985798
// ADD R,I,Imm
5799-
// ==> ORR V, ZR, Imm
5799+
// ==> MOV V, Imm
58005800
// ==> MADD R,A,B,V
58015801
// --- Create(MADD);
58025802
const TargetRegisterClass *OrrRC;
@@ -5824,13 +5824,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
58245824
Imm = Imm << Val;
58255825
}
58265826
uint64_t UImm = SignExtend64(Imm, BitSize);
5827-
uint64_t Encoding;
5828-
if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
5827+
// The immediate can be composed via a single instruction.
5828+
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
5829+
AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
5830+
if (Insn.size() != 1)
58295831
return;
5830-
MachineInstrBuilder MIB1 =
5831-
BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
5832-
.addReg(ZeroReg)
5833-
.addImm(Encoding);
5832+
auto MovI = Insn.begin();
5833+
MachineInstrBuilder MIB1;
5834+
// MOV is an alias for one of three instructions: movz, movn, and orr.
5835+
if (MovI->Opcode == OrrOpc)
5836+
MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
5837+
.addReg(ZeroReg)
5838+
.addImm(MovI->Op2);
5839+
else {
5840+
if (BitSize == 32)
5841+
assert((MovI->Opcode == AArch64::MOVNWi ||
5842+
MovI->Opcode == AArch64::MOVZWi) &&
5843+
"Expected opcode");
5844+
else
5845+
assert((MovI->Opcode == AArch64::MOVNXi ||
5846+
MovI->Opcode == AArch64::MOVZXi) &&
5847+
"Expected opcode");
5848+
MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR)
5849+
.addImm(MovI->Op1)
5850+
.addImm(MovI->Op2);
5851+
}
58345852
InsInstrs.push_back(MIB1);
58355853
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
58365854
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
@@ -5888,7 +5906,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
58885906
case MachineCombinerPattern::MULSUBXI_OP1: {
58895907
// MUL I=A,B,0
58905908
// SUB R,I, Imm
5891-
// ==> ORR V, ZR, -Imm
5909+
// ==> MOV V, -Imm
58925910
// ==> MADD R,A,B,V // = -Imm + A*B
58935911
// --- Create(MADD);
58945912
const TargetRegisterClass *OrrRC;
@@ -5915,13 +5933,31 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
59155933
Imm = Imm << Val;
59165934
}
59175935
uint64_t UImm = SignExtend64(-Imm, BitSize);
5918-
uint64_t Encoding;
5919-
if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
5936+
// The immediate can be composed via a single instruction.
5937+
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
5938+
AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
5939+
if (Insn.size() != 1)
59205940
return;
5921-
MachineInstrBuilder MIB1 =
5922-
BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
5923-
.addReg(ZeroReg)
5924-
.addImm(Encoding);
5941+
auto MovI = Insn.begin();
5942+
MachineInstrBuilder MIB1;
5943+
// MOV is an alias for one of three instructions: movz, movn, and orr.
5944+
if (MovI->Opcode == OrrOpc)
5945+
MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
5946+
.addReg(ZeroReg)
5947+
.addImm(MovI->Op2);
5948+
else {
5949+
if (BitSize == 32)
5950+
assert((MovI->Opcode == AArch64::MOVNWi ||
5951+
MovI->Opcode == AArch64::MOVZWi) &&
5952+
"Expected opcode");
5953+
else
5954+
assert((MovI->Opcode == AArch64::MOVNXi ||
5955+
MovI->Opcode == AArch64::MOVZXi) &&
5956+
"Expected opcode");
5957+
MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(MovI->Opcode), NewVR)
5958+
.addImm(MovI->Op1)
5959+
.addImm(MovI->Op2);
5960+
}
59255961
InsInstrs.push_back(MIB1);
59265962
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
59275963
MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);

llvm/test/CodeGen/AArch64/addimm-mulimm.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ define i64 @addimm_mulimm_accept_00(i64 %a) {
55
; CHECK-LABEL: addimm_mulimm_accept_00:
66
; CHECK: // %bb.0:
77
; CHECK-NEXT: mov w8, #37
8-
; CHECK-NEXT: mul x8, x0, x8
9-
; CHECK-NEXT: add x0, x8, #1147
8+
; CHECK-NEXT: mov x9, #1147
9+
; CHECK-NEXT: madd x0, x0, x8, x9
1010
; CHECK-NEXT: ret
1111
%tmp0 = add i64 %a, 31
1212
%tmp1 = mul i64 %tmp0, 37
@@ -17,8 +17,8 @@ define i64 @addimm_mulimm_accept_01(i64 %a) {
1717
; CHECK-LABEL: addimm_mulimm_accept_01:
1818
; CHECK: // %bb.0:
1919
; CHECK-NEXT: mov w8, #37
20-
; CHECK-NEXT: mul x8, x0, x8
21-
; CHECK-NEXT: sub x0, x8, #1147
20+
; CHECK-NEXT: mov x9, #-1147
21+
; CHECK-NEXT: madd x0, x0, x8, x9
2222
; CHECK-NEXT: ret
2323
%tmp0 = add i64 %a, -31
2424
%tmp1 = mul i64 %tmp0, 37
@@ -29,8 +29,8 @@ define signext i32 @addimm_mulimm_accept_02(i32 signext %a) {
2929
; CHECK-LABEL: addimm_mulimm_accept_02:
3030
; CHECK: // %bb.0:
3131
; CHECK-NEXT: mov w8, #37
32-
; CHECK-NEXT: mul w8, w0, w8
33-
; CHECK-NEXT: add w0, w8, #1147
32+
; CHECK-NEXT: mov w9, #1147
33+
; CHECK-NEXT: madd w0, w0, w8, w9
3434
; CHECK-NEXT: ret
3535
%tmp0 = add i32 %a, 31
3636
%tmp1 = mul i32 %tmp0, 37
@@ -41,8 +41,8 @@ define signext i32 @addimm_mulimm_accept_03(i32 signext %a) {
4141
; CHECK-LABEL: addimm_mulimm_accept_03:
4242
; CHECK: // %bb.0:
4343
; CHECK-NEXT: mov w8, #37
44-
; CHECK-NEXT: mul w8, w0, w8
45-
; CHECK-NEXT: sub w0, w8, #1147
44+
; CHECK-NEXT: mov w9, #-1147
45+
; CHECK-NEXT: madd w0, w0, w8, w9
4646
; CHECK-NEXT: ret
4747
%tmp0 = add i32 %a, -31
4848
%tmp1 = mul i32 %tmp0, 37

llvm/test/CodeGen/AArch64/machine-outliner-throw.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define dso_local i32 @_Z5func1i(i32 %x) #0 {
1313
; CHECK-NEXT: .cfi_def_cfa_offset 16
1414
; CHECK-NEXT: .cfi_offset w19, -8
1515
; CHECK-NEXT: .cfi_offset w30, -16
16-
; CHECK-NEXT: orr w8, wzr, #0x1
16+
; CHECK-NEXT: mov w8, #1
1717
; CHECK-NEXT: madd w19, w0, w0, w8
1818
; CHECK-NEXT: mov w0, #4
1919
; CHECK-NEXT: bl __cxa_allocate_exception
@@ -37,7 +37,7 @@ define dso_local i32 @_Z5func2c(i8 %x) #0 {
3737
; CHECK-NEXT: .cfi_offset w30, -16
3838
; CHECK-NEXT: and w8, w0, #0xff
3939
; CHECK-NEXT: mov w0, #4
40-
; CHECK-NEXT: orr w9, wzr, #0x1
40+
; CHECK-NEXT: mov w9, #1
4141
; CHECK-NEXT: madd w19, w8, w8, w9
4242
; CHECK-NEXT: bl __cxa_allocate_exception
4343
; CHECK-NEXT: bl OUTLINED_FUNCTION_0

llvm/test/CodeGen/AArch64/madd-combiner.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
define i32 @mul_add_imm(i32 %a, i32 %b) {
77
; CHECK-LABEL: mul_add_imm:
88
; CHECK: ; %bb.0:
9-
; CHECK-NEXT: orr w8, wzr, #0x4
9+
; CHECK-NEXT: mov w8, #4
1010
; CHECK-NEXT: madd w0, w0, w1, w8
1111
; CHECK-NEXT: ret
1212
%1 = mul i32 %a, %b
@@ -39,7 +39,7 @@ define void @mul_add_imm2() {
3939
; CHECK-FAST-LABEL: mul_add_imm2:
4040
; CHECK-FAST: ; %bb.0: ; %entry
4141
; CHECK-FAST-NEXT: mov x8, #-3
42-
; CHECK-FAST-NEXT: orr x9, xzr, #0xfffffffffffffffd
42+
; CHECK-FAST-NEXT: mov x9, #-3
4343
; CHECK-FAST-NEXT: madd x8, x8, x8, x9
4444
; CHECK-FAST-NEXT: mov x9, #45968
4545
; CHECK-FAST-NEXT: movk x9, #48484, lsl #16

llvm/test/CodeGen/AArch64/mul_pow2.ll

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,45 @@ define i64 @test6_smnegl(i32 %x) {
290290
ret i64 %sub
291291
}
292292

293+
; We may hoist the "mov" instructions out of a loop
294+
define i32 @mull6_sub(i32 %x) {
295+
; CHECK-LABEL: mull6_sub:
296+
; CHECK: // %bb.0:
297+
; CHECK-NEXT: mov w8, #6
298+
; CHECK-NEXT: mov w9, #-1
299+
; CHECK-NEXT: madd w0, w0, w8, w9
300+
; CHECK-NEXT: ret
301+
;
302+
; GISEL-LABEL: mull6_sub:
303+
; GISEL: // %bb.0:
304+
; GISEL-NEXT: mov w8, #6
305+
; GISEL-NEXT: mov w9, #-1
306+
; GISEL-NEXT: madd w0, w0, w8, w9
307+
; GISEL-NEXT: ret
308+
%mul = mul nsw i32 %x, 6
309+
%sub = add nsw i32 %mul, -1
310+
ret i32 %sub
311+
}
312+
313+
define i64 @mull6_sub_orr(i64 %x) {
314+
; CHECK-LABEL: mull6_sub_orr:
315+
; CHECK: // %bb.0:
316+
; CHECK-NEXT: mov w8, #6
317+
; CHECK-NEXT: mov x9, #16773120
318+
; CHECK-NEXT: madd x0, x0, x8, x9
319+
; CHECK-NEXT: ret
320+
;
321+
; GISEL-LABEL: mull6_sub_orr:
322+
; GISEL: // %bb.0:
323+
; GISEL-NEXT: mov w8, #6
324+
; GISEL-NEXT: mov x9, #16773120
325+
; GISEL-NEXT: madd x0, x0, x8, x9
326+
; GISEL-NEXT: ret
327+
%mul = mul nsw i64 %x, 6
328+
%sub = add nsw i64 %mul, 16773120
329+
ret i64 %sub
330+
}
331+
293332
define i32 @test7(i32 %x) {
294333
; CHECK-LABEL: test7:
295334
; CHECK: // %bb.0:
@@ -731,11 +770,11 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) {
731770
;
732771
; GISEL-LABEL: muladd_demand_commute:
733772
; GISEL: // %bb.0:
734-
; GISEL-NEXT: adrp x8, .LCPI42_1
735-
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI42_1]
736-
; GISEL-NEXT: adrp x8, .LCPI42_0
773+
; GISEL-NEXT: adrp x8, .LCPI44_1
774+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI44_1]
775+
; GISEL-NEXT: adrp x8, .LCPI44_0
737776
; GISEL-NEXT: mla v1.4s, v0.4s, v2.4s
738-
; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI42_0]
777+
; GISEL-NEXT: ldr q0, [x8, :lo12:.LCPI44_0]
739778
; GISEL-NEXT: and v0.16b, v1.16b, v0.16b
740779
; GISEL-NEXT: ret
741780
%m = mul <4 x i32> %x, <i32 131008, i32 131008, i32 131008, i32 131008>

llvm/test/CodeGen/AArch64/srem-seteq.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ define i32 @test_srem_odd_bit30(i32 %X) nounwind {
4747
; CHECK-LABEL: test_srem_odd_bit30:
4848
; CHECK: // %bb.0:
4949
; CHECK-NEXT: mov w8, #43691
50-
; CHECK-NEXT: orr w9, wzr, #0x1
50+
; CHECK-NEXT: mov w9, #1
5151
; CHECK-NEXT: movk w8, #27306, lsl #16
5252
; CHECK-NEXT: madd w8, w0, w8, w9
5353
; CHECK-NEXT: cmp w8, #3
@@ -64,7 +64,7 @@ define i32 @test_srem_odd_bit31(i32 %X) nounwind {
6464
; CHECK-LABEL: test_srem_odd_bit31:
6565
; CHECK: // %bb.0:
6666
; CHECK-NEXT: mov w8, #21845
67-
; CHECK-NEXT: orr w9, wzr, #0x1
67+
; CHECK-NEXT: mov w9, #1
6868
; CHECK-NEXT: movk w8, #54613, lsl #16
6969
; CHECK-NEXT: madd w8, w0, w8, w9
7070
; CHECK-NEXT: cmp w8, #3
@@ -122,7 +122,7 @@ define i32 @test_srem_even_bit30(i32 %X) nounwind {
122122
; CHECK-LABEL: test_srem_even_bit30:
123123
; CHECK: // %bb.0:
124124
; CHECK-NEXT: mov w8, #20165
125-
; CHECK-NEXT: orr w9, wzr, #0x8
125+
; CHECK-NEXT: mov w9, #8
126126
; CHECK-NEXT: movk w8, #64748, lsl #16
127127
; CHECK-NEXT: madd w8, w0, w8, w9
128128
; CHECK-NEXT: ror w8, w8, #3
@@ -140,7 +140,7 @@ define i32 @test_srem_even_bit31(i32 %X) nounwind {
140140
; CHECK-LABEL: test_srem_even_bit31:
141141
; CHECK: // %bb.0:
142142
; CHECK-NEXT: mov w8, #1285
143-
; CHECK-NEXT: orr w9, wzr, #0x2
143+
; CHECK-NEXT: mov w9, #2
144144
; CHECK-NEXT: movk w8, #50437, lsl #16
145145
; CHECK-NEXT: madd w8, w0, w8, w9
146146
; CHECK-NEXT: ror w8, w8, #1

llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,11 @@ define i1 @t32_6_3(i32 %X) nounwind {
137137
; CHECK-LABEL: t32_6_3:
138138
; CHECK: // %bb.0:
139139
; CHECK-NEXT: mov w8, #43691
140-
; CHECK-NEXT: mov w9, #43691
140+
; CHECK-NEXT: mov w9, #-1
141141
; CHECK-NEXT: movk w8, #43690, lsl #16
142+
; CHECK-NEXT: madd w8, w0, w8, w9
143+
; CHECK-NEXT: mov w9, #43691
142144
; CHECK-NEXT: movk w9, #10922, lsl #16
143-
; CHECK-NEXT: mul w8, w0, w8
144-
; CHECK-NEXT: sub w8, w8, #1
145145
; CHECK-NEXT: ror w8, w8, #1
146146
; CHECK-NEXT: cmp w8, w9
147147
; CHECK-NEXT: cset w0, lo
@@ -209,8 +209,8 @@ define i1 @t8_3_2(i8 %X) nounwind {
209209
; CHECK-LABEL: t8_3_2:
210210
; CHECK: // %bb.0:
211211
; CHECK-NEXT: mov w8, #-85
212-
; CHECK-NEXT: mul w8, w0, w8
213-
; CHECK-NEXT: sub w8, w8, #86
212+
; CHECK-NEXT: mov w9, #-86
213+
; CHECK-NEXT: madd w8, w0, w8, w9
214214
; CHECK-NEXT: and w8, w8, #0xff
215215
; CHECK-NEXT: cmp w8, #85
216216
; CHECK-NEXT: cset w0, lo

0 commit comments

Comments
 (0)