Skip to content

Commit c161720

Browse files
authored
[RISCV] Slightly improve expanded multiply emulation in getVLENFactoredAmount. (#84113)
Instead of initializing the accumulator to 0. Initialize it on first assignment with a mv from the register that holds VLENB << ShiftAmount. Fix a missing kill flag on the final Add. I have no real interest in this case, just an easy optimization I noticed.
1 parent 7755c26 commit c161720

File tree

4 files changed

+30
-37
lines changed

4 files changed

+30
-37
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3130,11 +3130,7 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
31303130
.addReg(N, RegState::Kill)
31313131
.setMIFlag(Flag);
31323132
} else {
3133-
Register Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3134-
BuildMI(MBB, II, DL, get(RISCV::ADDI), Acc)
3135-
.addReg(RISCV::X0)
3136-
.addImm(0)
3137-
.setMIFlag(Flag);
3133+
Register Acc;
31383134
uint32_t PrevShiftAmount = 0;
31393135
for (uint32_t ShiftAmount = 0; NumOfVReg >> ShiftAmount; ShiftAmount++) {
31403136
if (NumOfVReg & (1U << ShiftAmount)) {
@@ -3143,17 +3139,27 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
31433139
.addReg(DestReg, RegState::Kill)
31443140
.addImm(ShiftAmount - PrevShiftAmount)
31453141
.setMIFlag(Flag);
3146-
if (NumOfVReg >> (ShiftAmount + 1))
3147-
BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
3148-
.addReg(Acc, RegState::Kill)
3149-
.addReg(DestReg)
3150-
.setMIFlag(Flag);
3142+
if (NumOfVReg >> (ShiftAmount + 1)) {
3143+
// If we don't have an accmulator yet, create it and copy DestReg.
3144+
if (!Acc) {
3145+
Acc = MRI.createVirtualRegister(&RISCV::GPRRegClass);
3146+
BuildMI(MBB, II, DL, get(TargetOpcode::COPY), Acc)
3147+
.addReg(DestReg)
3148+
.setMIFlag(Flag);
3149+
} else {
3150+
BuildMI(MBB, II, DL, get(RISCV::ADD), Acc)
3151+
.addReg(Acc, RegState::Kill)
3152+
.addReg(DestReg)
3153+
.setMIFlag(Flag);
3154+
}
3155+
}
31513156
PrevShiftAmount = ShiftAmount;
31523157
}
31533158
}
3159+
assert(Acc && "Expected valid accumulator");
31543160
BuildMI(MBB, II, DL, get(RISCV::ADD), DestReg)
31553161
.addReg(DestReg, RegState::Kill)
3156-
.addReg(Acc)
3162+
.addReg(Acc, RegState::Kill)
31573163
.setMIFlag(Flag);
31583164
}
31593165
}

llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -253,9 +253,8 @@ define void @lmul4_and_2_x2_1() nounwind {
253253
; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
254254
; NOMUL-NEXT: addi s0, sp, 48
255255
; NOMUL-NEXT: csrr a0, vlenb
256-
; NOMUL-NEXT: li a1, 0
257256
; NOMUL-NEXT: slli a0, a0, 2
258-
; NOMUL-NEXT: add a1, a1, a0
257+
; NOMUL-NEXT: mv a1, a0
259258
; NOMUL-NEXT: slli a0, a0, 1
260259
; NOMUL-NEXT: add a0, a0, a1
261260
; NOMUL-NEXT: sub sp, sp, a0
@@ -455,9 +454,8 @@ define void @lmul_8_x5() nounwind {
455454
; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
456455
; NOMUL-NEXT: addi s0, sp, 80
457456
; NOMUL-NEXT: csrr a0, vlenb
458-
; NOMUL-NEXT: li a1, 0
459457
; NOMUL-NEXT: slli a0, a0, 3
460-
; NOMUL-NEXT: add a1, a1, a0
458+
; NOMUL-NEXT: mv a1, a0
461459
; NOMUL-NEXT: slli a0, a0, 2
462460
; NOMUL-NEXT: add a0, a0, a1
463461
; NOMUL-NEXT: sub sp, sp, a0
@@ -517,9 +515,8 @@ define void @lmul_8_x9() nounwind {
517515
; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
518516
; NOMUL-NEXT: addi s0, sp, 80
519517
; NOMUL-NEXT: csrr a0, vlenb
520-
; NOMUL-NEXT: li a1, 0
521518
; NOMUL-NEXT: slli a0, a0, 3
522-
; NOMUL-NEXT: add a1, a1, a0
519+
; NOMUL-NEXT: mv a1, a0
523520
; NOMUL-NEXT: slli a0, a0, 3
524521
; NOMUL-NEXT: add a0, a0, a1
525522
; NOMUL-NEXT: sub sp, sp, a0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2133,9 +2133,8 @@ define float @vreduce_fminimum_v128f32(ptr %x) {
21332133
; CHECK-NEXT: addi sp, sp, -16
21342134
; CHECK-NEXT: .cfi_def_cfa_offset 16
21352135
; CHECK-NEXT: csrr a1, vlenb
2136-
; CHECK-NEXT: li a2, 0
21372136
; CHECK-NEXT: slli a1, a1, 3
2138-
; CHECK-NEXT: add a2, a2, a1
2137+
; CHECK-NEXT: mv a2, a1
21392138
; CHECK-NEXT: slli a1, a1, 1
21402139
; CHECK-NEXT: add a1, a1, a2
21412140
; CHECK-NEXT: sub sp, sp, a1
@@ -2256,9 +2255,8 @@ define float @vreduce_fminimum_v128f32(ptr %x) {
22562255
; CHECK-NEXT: vfmin.vv v8, v11, v8
22572256
; CHECK-NEXT: vfmv.f.s fa0, v8
22582257
; CHECK-NEXT: csrr a0, vlenb
2259-
; CHECK-NEXT: li a1, 0
22602258
; CHECK-NEXT: slli a0, a0, 3
2261-
; CHECK-NEXT: add a1, a1, a0
2259+
; CHECK-NEXT: mv a1, a0
22622260
; CHECK-NEXT: slli a0, a0, 1
22632261
; CHECK-NEXT: add a0, a0, a1
22642262
; CHECK-NEXT: add sp, sp, a0
@@ -2739,9 +2737,8 @@ define double @vreduce_fminimum_v64f64(ptr %x) {
27392737
; CHECK-NEXT: addi sp, sp, -16
27402738
; CHECK-NEXT: .cfi_def_cfa_offset 16
27412739
; CHECK-NEXT: csrr a1, vlenb
2742-
; CHECK-NEXT: li a2, 0
27432740
; CHECK-NEXT: slli a1, a1, 3
2744-
; CHECK-NEXT: add a2, a2, a1
2741+
; CHECK-NEXT: mv a2, a1
27452742
; CHECK-NEXT: slli a1, a1, 1
27462743
; CHECK-NEXT: add a1, a1, a2
27472744
; CHECK-NEXT: sub sp, sp, a1
@@ -2852,9 +2849,8 @@ define double @vreduce_fminimum_v64f64(ptr %x) {
28522849
; CHECK-NEXT: vfmin.vv v8, v11, v8
28532850
; CHECK-NEXT: vfmv.f.s fa0, v8
28542851
; CHECK-NEXT: csrr a0, vlenb
2855-
; CHECK-NEXT: li a1, 0
28562852
; CHECK-NEXT: slli a0, a0, 3
2857-
; CHECK-NEXT: add a1, a1, a0
2853+
; CHECK-NEXT: mv a1, a0
28582854
; CHECK-NEXT: slli a0, a0, 1
28592855
; CHECK-NEXT: add a0, a0, a1
28602856
; CHECK-NEXT: add sp, sp, a0
@@ -3461,9 +3457,8 @@ define float @vreduce_fmaximum_v128f32(ptr %x) {
34613457
; CHECK-NEXT: addi sp, sp, -16
34623458
; CHECK-NEXT: .cfi_def_cfa_offset 16
34633459
; CHECK-NEXT: csrr a1, vlenb
3464-
; CHECK-NEXT: li a2, 0
34653460
; CHECK-NEXT: slli a1, a1, 3
3466-
; CHECK-NEXT: add a2, a2, a1
3461+
; CHECK-NEXT: mv a2, a1
34673462
; CHECK-NEXT: slli a1, a1, 1
34683463
; CHECK-NEXT: add a1, a1, a2
34693464
; CHECK-NEXT: sub sp, sp, a1
@@ -3584,9 +3579,8 @@ define float @vreduce_fmaximum_v128f32(ptr %x) {
35843579
; CHECK-NEXT: vfmax.vv v8, v11, v8
35853580
; CHECK-NEXT: vfmv.f.s fa0, v8
35863581
; CHECK-NEXT: csrr a0, vlenb
3587-
; CHECK-NEXT: li a1, 0
35883582
; CHECK-NEXT: slli a0, a0, 3
3589-
; CHECK-NEXT: add a1, a1, a0
3583+
; CHECK-NEXT: mv a1, a0
35903584
; CHECK-NEXT: slli a0, a0, 1
35913585
; CHECK-NEXT: add a0, a0, a1
35923586
; CHECK-NEXT: add sp, sp, a0
@@ -4067,9 +4061,8 @@ define double @vreduce_fmaximum_v64f64(ptr %x) {
40674061
; CHECK-NEXT: addi sp, sp, -16
40684062
; CHECK-NEXT: .cfi_def_cfa_offset 16
40694063
; CHECK-NEXT: csrr a1, vlenb
4070-
; CHECK-NEXT: li a2, 0
40714064
; CHECK-NEXT: slli a1, a1, 3
4072-
; CHECK-NEXT: add a2, a2, a1
4065+
; CHECK-NEXT: mv a2, a1
40734066
; CHECK-NEXT: slli a1, a1, 1
40744067
; CHECK-NEXT: add a1, a1, a2
40754068
; CHECK-NEXT: sub sp, sp, a1
@@ -4180,9 +4173,8 @@ define double @vreduce_fmaximum_v64f64(ptr %x) {
41804173
; CHECK-NEXT: vfmax.vv v8, v11, v8
41814174
; CHECK-NEXT: vfmv.f.s fa0, v8
41824175
; CHECK-NEXT: csrr a0, vlenb
4183-
; CHECK-NEXT: li a1, 0
41844176
; CHECK-NEXT: slli a0, a0, 3
4185-
; CHECK-NEXT: add a1, a1, a0
4177+
; CHECK-NEXT: mv a1, a0
41864178
; CHECK-NEXT: slli a0, a0, 1
41874179
; CHECK-NEXT: add a0, a0, a1
41884180
; CHECK-NEXT: add sp, sp, a0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,8 @@ define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
193193
; VLA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
194194
; VLA-NEXT: vmv1r.v v16, v15
195195
; VLA-NEXT: csrr a0, vlenb
196-
; VLA-NEXT: li a1, 0
197196
; VLA-NEXT: slli a0, a0, 3
198-
; VLA-NEXT: add a1, a1, a0
197+
; VLA-NEXT: mv a1, a0
199198
; VLA-NEXT: slli a0, a0, 1
200199
; VLA-NEXT: add a0, a0, a1
201200
; VLA-NEXT: add a0, sp, a0
@@ -245,9 +244,8 @@ define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
245244
; VLA-NEXT: li a0, 32
246245
; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma
247246
; VLA-NEXT: csrr a0, vlenb
248-
; VLA-NEXT: li a1, 0
249247
; VLA-NEXT: slli a0, a0, 3
250-
; VLA-NEXT: add a1, a1, a0
248+
; VLA-NEXT: mv a1, a0
251249
; VLA-NEXT: slli a0, a0, 1
252250
; VLA-NEXT: add a0, a0, a1
253251
; VLA-NEXT: add a0, sp, a0

0 commit comments

Comments
 (0)