Skip to content

Commit d1e17a3

Browse files
authored
[RISCV][GISel] Custom promote s32 G_SHL/ASHR/LSHR on RV64. (#115559)
Unless the shift amount is constant. In that case we zero extend the shift amount and promote the other input the same way widenScalar would. I'm not using widenScalar because that requires a separate call for each operand so it was easier to do both operands at once.
1 parent f9125dd commit d1e17a3

26 files changed

+1300
-1428
lines changed

llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,11 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
147147
.lower();
148148

149149
getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
150-
.legalFor({{s32, s32}, {sXLen, sXLen}})
150+
.legalFor({{sXLen, sXLen}})
151+
.customFor(ST.is64Bit(), {{s32, s32}})
151152
.widenScalarToNextPow2(0)
152-
.clampScalar(1, s32, sXLen)
153-
.clampScalar(0, s32, sXLen)
154-
.minScalarSameAs(1, 0)
155-
.maxScalarSameAs(1, 0);
153+
.clampScalar(1, sXLen, sXLen)
154+
.clampScalar(0, sXLen, sXLen);
156155

157156
auto &ExtActions =
158157
getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})
@@ -1166,6 +1165,12 @@ static unsigned getRISCVWOpcode(unsigned Opcode) {
11661165
switch (Opcode) {
11671166
default:
11681167
llvm_unreachable("Unexpected opcode");
1168+
case TargetOpcode::G_ASHR:
1169+
return RISCV::G_SRAW;
1170+
case TargetOpcode::G_LSHR:
1171+
return RISCV::G_SRLW;
1172+
case TargetOpcode::G_SHL:
1173+
return RISCV::G_SLLW;
11691174
case TargetOpcode::G_SDIV:
11701175
return RISCV::G_DIVW;
11711176
case TargetOpcode::G_UDIV:
@@ -1223,6 +1228,34 @@ bool RISCVLegalizerInfo::legalizeCustom(
12231228
return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==
12241229
LegalizerHelper::Legalized;
12251230
}
1231+
case TargetOpcode::G_ASHR:
1232+
case TargetOpcode::G_LSHR:
1233+
case TargetOpcode::G_SHL: {
1234+
if (getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI)) {
1235+
// We don't need a custom node for shift by constant. Just widen the
1236+
// source and the shift amount.
1237+
unsigned ExtOpc = TargetOpcode::G_ANYEXT;
1238+
if (MI.getOpcode() == TargetOpcode::G_ASHR)
1239+
ExtOpc = TargetOpcode::G_SEXT;
1240+
else if (MI.getOpcode() == TargetOpcode::G_LSHR)
1241+
ExtOpc = TargetOpcode::G_ZEXT;
1242+
1243+
Helper.Observer.changingInstr(MI);
1244+
Helper.widenScalarSrc(MI, sXLen, 1, ExtOpc);
1245+
Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ZEXT);
1246+
Helper.widenScalarDst(MI, sXLen);
1247+
Helper.Observer.changedInstr(MI);
1248+
return true;
1249+
}
1250+
1251+
Helper.Observer.changingInstr(MI);
1252+
Helper.widenScalarSrc(MI, sXLen, 1, TargetOpcode::G_ANYEXT);
1253+
Helper.widenScalarSrc(MI, sXLen, 2, TargetOpcode::G_ANYEXT);
1254+
Helper.widenScalarDst(MI, sXLen);
1255+
MI.setDesc(MIRBuilder.getTII().get(getRISCVWOpcode(MI.getOpcode())));
1256+
Helper.Observer.changedInstr(MI);
1257+
return true;
1258+
}
12261259
case TargetOpcode::G_SDIV:
12271260
case TargetOpcode::G_UDIV:
12281261
case TargetOpcode::G_UREM:

llvm/lib/Target/RISCV/RISCVGISel.td

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,12 @@ def : LdPat<load, LD, PtrVT>;
169169
def : StPat<store, SD, GPR, PtrVT>;
170170
}
171171

172+
let Predicates = [IsRV64] in {
173+
// FIXME: Temporary until i32->i64 zext is no longer legal.
174+
def : Pat <(srl (zext GPR:$rs1), uimm5:$shamt),
175+
(SRLIW GPR:$rs1, uimm5:$shamt)>;
176+
}
177+
172178
//===----------------------------------------------------------------------===//
173179
// RV64 i32 patterns not used by SelectionDAG
174180
//===----------------------------------------------------------------------===//
@@ -195,17 +201,6 @@ def : Pat<(anyext GPR:$src), (COPY GPR:$src)>;
195201
def : Pat<(sext GPR:$src), (ADDIW GPR:$src, 0)>;
196202
def : Pat<(trunc GPR:$src), (COPY GPR:$src)>;
197203

198-
def : PatGprGpr<shl, SLLW, i32, i32>;
199-
def : PatGprGpr<srl, SRLW, i32, i32>;
200-
def : PatGprGpr<sra, SRAW, i32, i32>;
201-
202-
def : Pat<(i32 (shl GPR:$rs1, uimm5i32:$imm)),
203-
(SLLIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
204-
def : Pat<(i32 (srl GPR:$rs1, uimm5i32:$imm)),
205-
(SRLIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
206-
def : Pat<(i32 (sra GPR:$rs1, uimm5i32:$imm)),
207-
(SRAIW GPR:$rs1, (i64 (as_i64imm $imm)))>;
208-
209204
// Use sext if the sign bit of the input is 0.
210205
def : Pat<(zext_is_sext GPR:$src), (ADDIW GPR:$src, 0)>;
211206
}

llvm/lib/Target/RISCV/RISCVInstrGISel.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,30 @@ class RISCVGenericInstruction : GenericInstruction {
1717
let Namespace = "RISCV";
1818
}
1919

20+
// Pseudo equivalent to a RISCVISD::SRAW.
21+
def G_SRAW : RISCVGenericInstruction {
22+
let OutOperandList = (outs type0:$dst);
23+
let InOperandList = (ins type0:$src1, type0:$src2);
24+
let hasSideEffects = false;
25+
}
26+
def : GINodeEquiv<G_SRAW, riscv_sraw>;
27+
28+
// Pseudo equivalent to a RISCVISD::SRLW.
29+
def G_SRLW : RISCVGenericInstruction {
30+
let OutOperandList = (outs type0:$dst);
31+
let InOperandList = (ins type0:$src1, type0:$src2);
32+
let hasSideEffects = false;
33+
}
34+
def : GINodeEquiv<G_SRLW, riscv_srlw>;
35+
36+
// Pseudo equivalent to a RISCVISD::SLLW.
37+
def G_SLLW : RISCVGenericInstruction {
38+
let OutOperandList = (outs type0:$dst);
39+
let InOperandList = (ins type0:$src1, type0:$src2);
40+
let hasSideEffects = false;
41+
}
42+
def : GINodeEquiv<G_SLLW, riscv_sllw>;
43+
2044
// Pseudo equivalent to a RISCVISD::DIVW.
2145
def G_DIVW : RISCVGenericInstruction {
2246
let OutOperandList = (outs type0:$dst);

llvm/test/CodeGen/RISCV/GlobalISel/alu-roundtrip.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ define i32 @slli_i32(i32 %a) {
171171
;
172172
; RV64IM-LABEL: slli_i32:
173173
; RV64IM: # %bb.0: # %entry
174-
; RV64IM-NEXT: slliw a0, a0, 11
174+
; RV64IM-NEXT: slli a0, a0, 11
175175
; RV64IM-NEXT: ret
176176
entry:
177177
%0 = shl i32 %a, 11

llvm/test/CodeGen/RISCV/GlobalISel/bitmanip.ll

Lines changed: 43 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
; RUN: llc -mtriple=riscv32 -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s --check-prefixes=RV32
33
; RUN: llc -mtriple=riscv64 -global-isel -global-isel-abort=1 < %s 2>&1 | FileCheck %s --check-prefixes=RV64
44

5-
; FIXME: andi a0, a0, 1 is unneeded
65
define i2 @bitreverse_i2(i2 %x) {
76
; RV32-LABEL: bitreverse_i2:
87
; RV32: # %bb.0:
@@ -18,15 +17,13 @@ define i2 @bitreverse_i2(i2 %x) {
1817
; RV64-NEXT: slli a1, a0, 1
1918
; RV64-NEXT: andi a1, a1, 2
2019
; RV64-NEXT: andi a0, a0, 3
21-
; RV64-NEXT: srliw a0, a0, 1
22-
; RV64-NEXT: andi a0, a0, 1
20+
; RV64-NEXT: srli a0, a0, 1
2321
; RV64-NEXT: or a0, a1, a0
2422
; RV64-NEXT: ret
2523
%rev = call i2 @llvm.bitreverse.i2(i2 %x)
2624
ret i2 %rev
2725
}
2826

29-
; FIXME: andi a0, a0, 1 is unneeded
3027
define i3 @bitreverse_i3(i3 %x) {
3128
; RV32-LABEL: bitreverse_i3:
3229
; RV32: # %bb.0:
@@ -46,15 +43,13 @@ define i3 @bitreverse_i3(i3 %x) {
4643
; RV64-NEXT: andi a0, a0, 7
4744
; RV64-NEXT: andi a2, a0, 2
4845
; RV64-NEXT: or a1, a1, a2
49-
; RV64-NEXT: srliw a0, a0, 2
50-
; RV64-NEXT: andi a0, a0, 1
46+
; RV64-NEXT: srli a0, a0, 2
5147
; RV64-NEXT: or a0, a1, a0
5248
; RV64-NEXT: ret
5349
%rev = call i3 @llvm.bitreverse.i3(i3 %x)
5450
ret i3 %rev
5551
}
5652

57-
; FIXME: andi a0, a0, 1 is unneeded
5853
define i4 @bitreverse_i4(i4 %x) {
5954
; RV32-LABEL: bitreverse_i4:
6055
; RV32: # %bb.0:
@@ -79,18 +74,16 @@ define i4 @bitreverse_i4(i4 %x) {
7974
; RV64-NEXT: andi a2, a2, 4
8075
; RV64-NEXT: or a1, a1, a2
8176
; RV64-NEXT: andi a0, a0, 15
82-
; RV64-NEXT: srliw a2, a0, 1
77+
; RV64-NEXT: srli a2, a0, 1
8378
; RV64-NEXT: andi a2, a2, 2
8479
; RV64-NEXT: or a1, a1, a2
85-
; RV64-NEXT: srliw a0, a0, 3
86-
; RV64-NEXT: andi a0, a0, 1
80+
; RV64-NEXT: srli a0, a0, 3
8781
; RV64-NEXT: or a0, a1, a0
8882
; RV64-NEXT: ret
8983
%rev = call i4 @llvm.bitreverse.i4(i4 %x)
9084
ret i4 %rev
9185
}
9286

93-
; FIXME: andi a0, a0, 1 is unneeded
9487
define i7 @bitreverse_i7(i7 %x) {
9588
; RV32-LABEL: bitreverse_i7:
9689
; RV32: # %bb.0:
@@ -122,20 +115,20 @@ define i7 @bitreverse_i7(i7 %x) {
122115
; RV64-NEXT: slli a2, a0, 4
123116
; RV64-NEXT: andi a2, a2, 32
124117
; RV64-NEXT: or a1, a1, a2
125-
; RV64-NEXT: slli a2, a0, 2
126-
; RV64-NEXT: andi a2, a2, 16
118+
; RV64-NEXT: li a2, 2
119+
; RV64-NEXT: slli a3, a0, 2
120+
; RV64-NEXT: andi a3, a3, 16
127121
; RV64-NEXT: andi a0, a0, 127
128-
; RV64-NEXT: andi a3, a0, 8
129-
; RV64-NEXT: or a2, a2, a3
122+
; RV64-NEXT: andi a4, a0, 8
123+
; RV64-NEXT: or a3, a3, a4
124+
; RV64-NEXT: or a1, a1, a3
125+
; RV64-NEXT: srli a3, a0, 2
126+
; RV64-NEXT: andi a3, a3, 4
127+
; RV64-NEXT: srli a4, a0, 4
128+
; RV64-NEXT: and a2, a4, a2
129+
; RV64-NEXT: or a2, a3, a2
130130
; RV64-NEXT: or a1, a1, a2
131-
; RV64-NEXT: srliw a2, a0, 2
132-
; RV64-NEXT: andi a2, a2, 4
133-
; RV64-NEXT: srliw a3, a0, 4
134-
; RV64-NEXT: andi a3, a3, 2
135-
; RV64-NEXT: or a2, a2, a3
136-
; RV64-NEXT: or a1, a1, a2
137-
; RV64-NEXT: srliw a0, a0, 6
138-
; RV64-NEXT: andi a0, a0, 1
131+
; RV64-NEXT: srli a0, a0, 6
139132
; RV64-NEXT: or a0, a1, a0
140133
; RV64-NEXT: ret
141134
%rev = call i7 @llvm.bitreverse.i7(i7 %x)
@@ -179,39 +172,39 @@ define i24 @bitreverse_i24(i24 %x) {
179172
;
180173
; RV64-LABEL: bitreverse_i24:
181174
; RV64: # %bb.0:
182-
; RV64-NEXT: slli a1, a0, 16
183-
; RV64-NEXT: lui a2, 4096
184-
; RV64-NEXT: addi a2, a2, -1
185-
; RV64-NEXT: and a0, a0, a2
186-
; RV64-NEXT: srliw a0, a0, 16
187-
; RV64-NEXT: or a0, a0, a1
188-
; RV64-NEXT: lui a1, 65521
189-
; RV64-NEXT: addi a1, a1, -241
190-
; RV64-NEXT: slli a1, a1, 4
191-
; RV64-NEXT: and a3, a1, a2
175+
; RV64-NEXT: lui a1, 4096
176+
; RV64-NEXT: addiw a1, a1, -1
177+
; RV64-NEXT: slli a2, a0, 16
178+
; RV64-NEXT: and a0, a0, a1
179+
; RV64-NEXT: srli a0, a0, 16
180+
; RV64-NEXT: or a0, a0, a2
181+
; RV64-NEXT: lui a2, 65521
182+
; RV64-NEXT: addiw a2, a2, -241
183+
; RV64-NEXT: slli a2, a2, 4
184+
; RV64-NEXT: and a3, a2, a1
192185
; RV64-NEXT: and a3, a0, a3
193-
; RV64-NEXT: srliw a3, a3, 4
186+
; RV64-NEXT: srli a3, a3, 4
194187
; RV64-NEXT: slli a0, a0, 4
195-
; RV64-NEXT: and a0, a0, a1
188+
; RV64-NEXT: and a0, a0, a2
196189
; RV64-NEXT: or a0, a3, a0
197-
; RV64-NEXT: lui a1, 261939
198-
; RV64-NEXT: addi a1, a1, 819
199-
; RV64-NEXT: slli a1, a1, 2
200-
; RV64-NEXT: and a3, a1, a2
190+
; RV64-NEXT: lui a2, 261939
191+
; RV64-NEXT: addiw a2, a2, 819
192+
; RV64-NEXT: slli a2, a2, 2
193+
; RV64-NEXT: and a3, a2, a1
201194
; RV64-NEXT: and a3, a0, a3
202-
; RV64-NEXT: srliw a3, a3, 2
195+
; RV64-NEXT: srli a3, a3, 2
203196
; RV64-NEXT: slli a0, a0, 2
204-
; RV64-NEXT: and a0, a0, a1
197+
; RV64-NEXT: and a0, a0, a2
205198
; RV64-NEXT: or a0, a3, a0
206-
; RV64-NEXT: lui a1, 523605
207-
; RV64-NEXT: addiw a1, a1, 1365
208-
; RV64-NEXT: slli a1, a1, 1
209-
; RV64-NEXT: and a2, a1, a2
210-
; RV64-NEXT: and a2, a0, a2
211-
; RV64-NEXT: srliw a2, a2, 1
212-
; RV64-NEXT: slliw a0, a0, 1
213-
; RV64-NEXT: and a0, a0, a1
214-
; RV64-NEXT: or a0, a2, a0
199+
; RV64-NEXT: lui a2, 523605
200+
; RV64-NEXT: addiw a2, a2, 1365
201+
; RV64-NEXT: slli a2, a2, 1
202+
; RV64-NEXT: and a1, a2, a1
203+
; RV64-NEXT: and a1, a0, a1
204+
; RV64-NEXT: srli a1, a1, 1
205+
; RV64-NEXT: slli a0, a0, 1
206+
; RV64-NEXT: and a0, a0, a2
207+
; RV64-NEXT: or a0, a1, a0
215208
; RV64-NEXT: ret
216209
%rev = call i24 @llvm.bitreverse.i24(i24 %x)
217210
ret i24 %rev

llvm/test/CodeGen/RISCV/GlobalISel/combine.ll

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,18 @@ define i32 @mul_to_shift(i32 %x) {
4242
; RV32-NEXT: slli a0, a0, 2
4343
; RV32-NEXT: ret
4444
;
45-
; RV64-LABEL: mul_to_shift:
46-
; RV64: # %bb.0:
47-
; RV64-NEXT: slliw a0, a0, 2
48-
; RV64-NEXT: ret
45+
; RV64-O0-LABEL: mul_to_shift:
46+
; RV64-O0: # %bb.0:
47+
; RV64-O0-NEXT: li a1, 2
48+
; RV64-O0-NEXT: sll a0, a0, a1
49+
; RV64-O0-NEXT: ret
50+
;
51+
; RV64-OPT-LABEL: mul_to_shift:
52+
; RV64-OPT: # %bb.0:
53+
; RV64-OPT-NEXT: slli a0, a0, 2
54+
; RV64-OPT-NEXT: ret
4955
%a = mul i32 %x, 4
5056
ret i32 %a
5157
}
58+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
59+
; RV64: {{.*}}

llvm/test/CodeGen/RISCV/GlobalISel/iabs.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ define i8 @abs8(i8 %x) {
3131
;
3232
; RV64I-LABEL: abs8:
3333
; RV64I: # %bb.0:
34-
; RV64I-NEXT: slli a1, a0, 24
35-
; RV64I-NEXT: sraiw a1, a1, 31
34+
; RV64I-NEXT: slli a1, a0, 56
35+
; RV64I-NEXT: srai a1, a1, 63
3636
; RV64I-NEXT: add a0, a0, a1
3737
; RV64I-NEXT: xor a0, a0, a1
3838
; RV64I-NEXT: ret
@@ -65,8 +65,8 @@ define i16 @abs16(i16 %x) {
6565
;
6666
; RV64I-LABEL: abs16:
6767
; RV64I: # %bb.0:
68-
; RV64I-NEXT: slli a1, a0, 16
69-
; RV64I-NEXT: sraiw a1, a1, 31
68+
; RV64I-NEXT: slli a1, a0, 48
69+
; RV64I-NEXT: srai a1, a1, 63
7070
; RV64I-NEXT: add a0, a0, a1
7171
; RV64I-NEXT: xor a0, a0, a1
7272
; RV64I-NEXT: ret

0 commit comments

Comments
 (0)