Skip to content

Commit dbaa189

Browse files
authored
[RISCV] Generate more W instructons
We rename `TuneNoStripWSuffix` to `TunePreferWInst`. If all the users of an instruction just use the low 32 bits, we can convert it to its W variant. A quick test on Coremark (`-O3 -march=rv64gc`): | | W instructions | code size(.text) | |--------|----------------|------------------| | before | 302 | 12257 | | after | 343 | 12265 | | | +13.58% | +0.065% | Reviewers: asb, dtcxzyw, preames, lukel97, michaelmaitland, topperc Reviewed By: topperc, dtcxzyw Pull Request: #87237
1 parent a169d4c commit dbaa189

File tree

5 files changed

+447
-89
lines changed

5 files changed

+447
-89
lines changed

llvm/lib/Target/RISCV/RISCVFeatures.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,9 +1226,9 @@ def TuneNoSinkSplatOperands
12261226
"false", "Disable sink splat operands to enable .vx, .vf,"
12271227
".wx, and .wf instructions">;
12281228

1229-
def TuneNoStripWSuffix
1230-
: SubtargetFeature<"no-strip-w-suffix", "EnableStripWSuffix", "false",
1231-
"Disable strip W suffix">;
1229+
def TunePreferWInst
1230+
: SubtargetFeature<"prefer-w-inst", "PreferWInst", "true",
1231+
"Prefer instructions with W suffix">;
12321232

12331233
def TuneConditionalCompressedMoveFusion
12341234
: SubtargetFeature<"conditional-cmv-fusion", "HasConditionalCompressedMoveFusion",

llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp

Lines changed: 77 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,24 @@
1212
// extended bits aren't consumed or because the input was already sign extended
1313
// by an earlier instruction.
1414
//
15-
// Then it removes the -w suffix from opw instructions whenever all users are
16-
// dependent only on the lower word of the result of the instruction.
17-
// The cases handled are:
18-
// * addw because c.add has a larger register encoding than c.addw.
19-
// * addiw because it helps reduce test differences between RV32 and RV64
20-
// w/o being a pessimization.
21-
// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
22-
// * slliw because c.slliw doesn't exist and c.slli does
15+
// Then:
16+
// 1. Unless explicit disabled or the target prefers instructions with W suffix,
17+
// it removes the -w suffix from opw instructions whenever all users are
18+
// dependent only on the lower word of the result of the instruction.
19+
// The cases handled are:
20+
// * addw because c.add has a larger register encoding than c.addw.
21+
// * addiw because it helps reduce test differences between RV32 and RV64
22+
// w/o being a pessimization.
23+
// * mulw because c.mulw doesn't exist but c.mul does (w/ zcb)
24+
// * slliw because c.slliw doesn't exist and c.slli does
2325
//
26+
// 2. Or if explicit enabled or the target prefers instructions with W suffix,
27+
// it adds the W suffix to the instruction whenever all users are dependent
28+
// only on the lower word of the result of the instruction.
29+
// The cases handled are:
30+
// * add/addi/sub/mul.
31+
// * slli with imm < 32.
32+
// * ld/lwu.
2433
//===---------------------------------------------------------------------===//
2534

2635
#include "RISCV.h"
@@ -60,6 +69,8 @@ class RISCVOptWInstrs : public MachineFunctionPass {
6069
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
6170
bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
6271
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
72+
bool appendWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
73+
const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
6374

6475
void getAnalysisUsage(AnalysisUsage &AU) const override {
6576
AU.setPreservesCFG();
@@ -672,9 +683,6 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
672683
const RISCVInstrInfo &TII,
673684
const RISCVSubtarget &ST,
674685
MachineRegisterInfo &MRI) {
675-
if (DisableStripWSuffix || !ST.enableStripWSuffix())
676-
return false;
677-
678686
bool MadeChange = false;
679687
for (MachineBasicBlock &MBB : MF) {
680688
for (MachineInstr &MI : MBB) {
@@ -698,6 +706,58 @@ bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
698706
return MadeChange;
699707
}
700708

709+
bool RISCVOptWInstrs::appendWSuffixes(MachineFunction &MF,
710+
const RISCVInstrInfo &TII,
711+
const RISCVSubtarget &ST,
712+
MachineRegisterInfo &MRI) {
713+
bool MadeChange = false;
714+
for (MachineBasicBlock &MBB : MF) {
715+
for (MachineInstr &MI : MBB) {
716+
unsigned WOpc;
717+
// TODO: Add more?
718+
switch (MI.getOpcode()) {
719+
default:
720+
continue;
721+
case RISCV::ADD:
722+
WOpc = RISCV::ADDW;
723+
break;
724+
case RISCV::ADDI:
725+
WOpc = RISCV::ADDIW;
726+
break;
727+
case RISCV::SUB:
728+
WOpc = RISCV::SUBW;
729+
break;
730+
case RISCV::MUL:
731+
WOpc = RISCV::MULW;
732+
break;
733+
case RISCV::SLLI:
734+
// SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
735+
if (MI.getOperand(2).getImm() >= 32)
736+
continue;
737+
WOpc = RISCV::SLLIW;
738+
break;
739+
case RISCV::LD:
740+
case RISCV::LWU:
741+
WOpc = RISCV::LW;
742+
break;
743+
}
744+
745+
if (hasAllWUsers(MI, ST, MRI)) {
746+
LLVM_DEBUG(dbgs() << "Replacing " << MI);
747+
MI.setDesc(TII.get(WOpc));
748+
MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
749+
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
750+
MI.clearFlag(MachineInstr::MIFlag::IsExact);
751+
LLVM_DEBUG(dbgs() << " with " << MI);
752+
++NumTransformedToWInstrs;
753+
MadeChange = true;
754+
}
755+
}
756+
}
757+
758+
return MadeChange;
759+
}
760+
701761
bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
702762
if (skipFunction(MF.getFunction()))
703763
return false;
@@ -711,7 +771,12 @@ bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
711771

712772
bool MadeChange = false;
713773
MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
714-
MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
774+
775+
if (!(DisableStripWSuffix || ST.preferWInst()))
776+
MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
777+
778+
if (ST.preferWInst())
779+
MadeChange |= appendWSuffixes(MF, TII, ST, MRI);
715780

716781
return MadeChange;
717782
}
Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv64 -mattr=+m -verify-machineinstrs < %s \
3+
; RUN: | FileCheck -check-prefixes=NO-PREFER-W-INST %s
4+
; RUN: llc -mtriple=riscv64 -mattr=+m -riscv-disable-strip-w-suffix -verify-machineinstrs < %s \
5+
; RUN: | FileCheck -check-prefixes=NO-STRIP %s
6+
; RUN: llc -mtriple=riscv64 -mattr=+m,+prefer-w-inst -verify-machineinstrs < %s \
7+
; RUN: | FileCheck -check-prefixes=PREFER-W-INST %s
8+
9+
define i32 @addiw(i32 %a) {
10+
; NO-PREFER-W-INST-LABEL: addiw:
11+
; NO-PREFER-W-INST: # %bb.0:
12+
; NO-PREFER-W-INST-NEXT: lui a1, 1
13+
; NO-PREFER-W-INST-NEXT: addi a1, a1, -1
14+
; NO-PREFER-W-INST-NEXT: addw a0, a0, a1
15+
; NO-PREFER-W-INST-NEXT: ret
16+
;
17+
; NO-STRIP-LABEL: addiw:
18+
; NO-STRIP: # %bb.0:
19+
; NO-STRIP-NEXT: lui a1, 1
20+
; NO-STRIP-NEXT: addiw a1, a1, -1
21+
; NO-STRIP-NEXT: addw a0, a0, a1
22+
; NO-STRIP-NEXT: ret
23+
;
24+
; PREFER-W-INST-LABEL: addiw:
25+
; PREFER-W-INST: # %bb.0:
26+
; PREFER-W-INST-NEXT: lui a1, 1
27+
; PREFER-W-INST-NEXT: addiw a1, a1, -1
28+
; PREFER-W-INST-NEXT: addw a0, a0, a1
29+
; PREFER-W-INST-NEXT: ret
30+
%ret = add i32 %a, 4095
31+
ret i32 %ret
32+
}
33+
34+
define i32 @addw(i32 %a, i32 %b) {
35+
; NO-PREFER-W-INST-LABEL: addw:
36+
; NO-PREFER-W-INST: # %bb.0:
37+
; NO-PREFER-W-INST-NEXT: add a0, a0, a1
38+
; NO-PREFER-W-INST-NEXT: addiw a0, a0, 1024
39+
; NO-PREFER-W-INST-NEXT: ret
40+
;
41+
; NO-STRIP-LABEL: addw:
42+
; NO-STRIP: # %bb.0:
43+
; NO-STRIP-NEXT: addw a0, a0, a1
44+
; NO-STRIP-NEXT: addiw a0, a0, 1024
45+
; NO-STRIP-NEXT: ret
46+
;
47+
; PREFER-W-INST-LABEL: addw:
48+
; PREFER-W-INST: # %bb.0:
49+
; PREFER-W-INST-NEXT: addw a0, a0, a1
50+
; PREFER-W-INST-NEXT: addiw a0, a0, 1024
51+
; PREFER-W-INST-NEXT: ret
52+
%add = add i32 %a, %b
53+
%ret = add i32 %add, 1024
54+
ret i32 %ret
55+
}
56+
57+
define i32 @mulw(i32 %a, i32 %b) {
58+
; NO-PREFER-W-INST-LABEL: mulw:
59+
; NO-PREFER-W-INST: # %bb.0:
60+
; NO-PREFER-W-INST-NEXT: mul a1, a0, a1
61+
; NO-PREFER-W-INST-NEXT: mul a0, a0, a1
62+
; NO-PREFER-W-INST-NEXT: addiw a0, a0, 1024
63+
; NO-PREFER-W-INST-NEXT: ret
64+
;
65+
; NO-STRIP-LABEL: mulw:
66+
; NO-STRIP: # %bb.0:
67+
; NO-STRIP-NEXT: mulw a1, a0, a1
68+
; NO-STRIP-NEXT: mulw a0, a0, a1
69+
; NO-STRIP-NEXT: addiw a0, a0, 1024
70+
; NO-STRIP-NEXT: ret
71+
;
72+
; PREFER-W-INST-LABEL: mulw:
73+
; PREFER-W-INST: # %bb.0:
74+
; PREFER-W-INST-NEXT: mulw a1, a0, a1
75+
; PREFER-W-INST-NEXT: mulw a0, a0, a1
76+
; PREFER-W-INST-NEXT: addiw a0, a0, 1024
77+
; PREFER-W-INST-NEXT: ret
78+
%mul1 = mul i32 %a, %b
79+
%mul = mul i32 %a, %mul1
80+
%ret = add i32 %mul, 1024
81+
ret i32 %ret
82+
}
83+
84+
define i32 @slliw(i32 %a) {
85+
; NO-PREFER-W-INST-LABEL: slliw:
86+
; NO-PREFER-W-INST: # %bb.0:
87+
; NO-PREFER-W-INST-NEXT: slli a0, a0, 1
88+
; NO-PREFER-W-INST-NEXT: addiw a0, a0, 1024
89+
; NO-PREFER-W-INST-NEXT: ret
90+
;
91+
; NO-STRIP-LABEL: slliw:
92+
; NO-STRIP: # %bb.0:
93+
; NO-STRIP-NEXT: slliw a0, a0, 1
94+
; NO-STRIP-NEXT: addiw a0, a0, 1024
95+
; NO-STRIP-NEXT: ret
96+
;
97+
; PREFER-W-INST-LABEL: slliw:
98+
; PREFER-W-INST: # %bb.0:
99+
; PREFER-W-INST-NEXT: slliw a0, a0, 1
100+
; PREFER-W-INST-NEXT: addiw a0, a0, 1024
101+
; PREFER-W-INST-NEXT: ret
102+
%shl = shl i32 %a, 1
103+
%ret = add i32 %shl, 1024
104+
ret i32 %ret
105+
}

0 commit comments

Comments
 (0)