Skip to content

Commit ce6e4f2

Browse files
committed
[RISCV] Use fractional LMULs for fixed length types smaller than riscv-v-vector-bits-min.
My thought process is that if v2i64 is an LMUL=1 type then v2i32 should be an LMUL=1/2 type. We limit the fractional LMUL so that SEW=64 clips to LMUL=1, SEW=32 clips to LMUL=1/2, etc. This ensures there's always a fractional LMUL available to truncate a type. This does reduce the number of vsetvlis in some cases. Some tests increase vsetvlis because the best container type for a mask type is dependent on the LMUL+SEW that the mask was produced from, but you can't tell that from the type. I think this is something we need to solve this in the machine IR when optimizing vsetvlis. Reviewed By: frasercrmck Differential Revision: https://reviews.llvm.org/D101215
1 parent 5f78ba0 commit ce6e4f2

File tree

58 files changed

+1840
-1824
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1840
-1824
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1216,24 +1216,20 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
12161216
switch (EltVT.SimpleTy) {
12171217
default:
12181218
llvm_unreachable("unexpected element type for RVV container");
1219-
case MVT::i1: {
1220-
// Masks are calculated assuming 8-bit elements since that's when we need
1221-
// the most elements.
1222-
MinVLen /= 8;
1223-
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1224-
unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / 8;
1225-
return MVT::getScalableVectorVT(MVT::i1, LMul * EltsPerBlock);
1226-
}
1219+
case MVT::i1:
12271220
case MVT::i8:
12281221
case MVT::i16:
12291222
case MVT::i32:
12301223
case MVT::i64:
12311224
case MVT::f16:
12321225
case MVT::f32:
12331226
case MVT::f64: {
1234-
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
1235-
unsigned EltsPerBlock = RISCV::RVVBitsPerBlock / EltVT.getSizeInBits();
1236-
return MVT::getScalableVectorVT(EltVT, LMul * EltsPerBlock);
1227+
// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
1228+
// narrower types, but we can't have a fractional LMUL with demoninator less
1229+
// than 64/SEW.
1230+
unsigned NumElts =
1231+
divideCeil(VT.getVectorNumElements(), MinVLen / RISCV::RVVBitsPerBlock);
1232+
return MVT::getScalableVectorVT(EltVT, NumElts);
12371233
}
12381234
}
12391235
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ define <32 x i1> @bitcast_v4i8_v32i1(<4 x i8> %a, <32 x i1> %b) {
1717
define i8 @bitcast_v1i8_i8(<1 x i8> %a) {
1818
; CHECK-LABEL: bitcast_v1i8_i8:
1919
; CHECK: # %bb.0:
20-
; CHECK-NEXT: vsetvli zero, zero, e8,m1,ta,mu
20+
; CHECK-NEXT: vsetvli zero, zero, e8,mf8,ta,mu
2121
; CHECK-NEXT: vmv.x.s a0, v8
2222
; CHECK-NEXT: ret
2323
%b = bitcast <1 x i8> %a to i8
@@ -27,7 +27,7 @@ define i8 @bitcast_v1i8_i8(<1 x i8> %a) {
2727
define i16 @bitcast_v2i8_i16(<2 x i8> %a) {
2828
; CHECK-LABEL: bitcast_v2i8_i16:
2929
; CHECK: # %bb.0:
30-
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
30+
; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu
3131
; CHECK-NEXT: vmv.x.s a0, v8
3232
; CHECK-NEXT: ret
3333
%b = bitcast <2 x i8> %a to i16
@@ -37,7 +37,7 @@ define i16 @bitcast_v2i8_i16(<2 x i8> %a) {
3737
define i16 @bitcast_v1i16_i16(<1 x i16> %a) {
3838
; CHECK-LABEL: bitcast_v1i16_i16:
3939
; CHECK: # %bb.0:
40-
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
40+
; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu
4141
; CHECK-NEXT: vmv.x.s a0, v8
4242
; CHECK-NEXT: ret
4343
%b = bitcast <1 x i16> %a to i16
@@ -47,7 +47,7 @@ define i16 @bitcast_v1i16_i16(<1 x i16> %a) {
4747
define i32 @bitcast_v4i8_i32(<4 x i8> %a) {
4848
; CHECK-LABEL: bitcast_v4i8_i32:
4949
; CHECK: # %bb.0:
50-
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
50+
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
5151
; CHECK-NEXT: vmv.x.s a0, v8
5252
; CHECK-NEXT: ret
5353
%b = bitcast <4 x i8> %a to i32
@@ -57,7 +57,7 @@ define i32 @bitcast_v4i8_i32(<4 x i8> %a) {
5757
define i32 @bitcast_v2i16_i32(<2 x i16> %a) {
5858
; CHECK-LABEL: bitcast_v2i16_i32:
5959
; CHECK: # %bb.0:
60-
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
60+
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
6161
; CHECK-NEXT: vmv.x.s a0, v8
6262
; CHECK-NEXT: ret
6363
%b = bitcast <2 x i16> %a to i32
@@ -67,7 +67,7 @@ define i32 @bitcast_v2i16_i32(<2 x i16> %a) {
6767
define i32 @bitcast_v1i32_i32(<1 x i32> %a) {
6868
; CHECK-LABEL: bitcast_v1i32_i32:
6969
; CHECK: # %bb.0:
70-
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
70+
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
7171
; CHECK-NEXT: vmv.x.s a0, v8
7272
; CHECK-NEXT: ret
7373
%b = bitcast <1 x i32> %a to i32
@@ -153,7 +153,7 @@ define i64 @bitcast_v1i64_i64(<1 x i64> %a) {
153153
define half @bitcast_v2i8_f16(<2 x i8> %a) {
154154
; CHECK-LABEL: bitcast_v2i8_f16:
155155
; CHECK: # %bb.0:
156-
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
156+
; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu
157157
; CHECK-NEXT: vmv.x.s a0, v8
158158
; CHECK-NEXT: ret
159159
%b = bitcast <2 x i8> %a to half
@@ -163,7 +163,7 @@ define half @bitcast_v2i8_f16(<2 x i8> %a) {
163163
define half @bitcast_v1i16_f16(<1 x i16> %a) {
164164
; CHECK-LABEL: bitcast_v1i16_f16:
165165
; CHECK: # %bb.0:
166-
; CHECK-NEXT: vsetvli zero, zero, e16,m1,ta,mu
166+
; CHECK-NEXT: vsetvli zero, zero, e16,mf4,ta,mu
167167
; CHECK-NEXT: vmv.x.s a0, v8
168168
; CHECK-NEXT: ret
169169
%b = bitcast <1 x i16> %a to half
@@ -173,7 +173,7 @@ define half @bitcast_v1i16_f16(<1 x i16> %a) {
173173
define float @bitcast_v4i8_f32(<4 x i8> %a) {
174174
; CHECK-LABEL: bitcast_v4i8_f32:
175175
; CHECK: # %bb.0:
176-
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
176+
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
177177
; CHECK-NEXT: vmv.x.s a0, v8
178178
; CHECK-NEXT: ret
179179
%b = bitcast <4 x i8> %a to float
@@ -183,7 +183,7 @@ define float @bitcast_v4i8_f32(<4 x i8> %a) {
183183
define float @bitcast_v2i16_f32(<2 x i16> %a) {
184184
; CHECK-LABEL: bitcast_v2i16_f32:
185185
; CHECK: # %bb.0:
186-
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
186+
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
187187
; CHECK-NEXT: vmv.x.s a0, v8
188188
; CHECK-NEXT: ret
189189
%b = bitcast <2 x i16> %a to float
@@ -193,7 +193,7 @@ define float @bitcast_v2i16_f32(<2 x i16> %a) {
193193
define float @bitcast_v1i32_f32(<1 x i32> %a) {
194194
; CHECK-LABEL: bitcast_v1i32_f32:
195195
; CHECK: # %bb.0:
196-
; CHECK-NEXT: vsetvli zero, zero, e32,m1,ta,mu
196+
; CHECK-NEXT: vsetvli zero, zero, e32,mf2,ta,mu
197197
; CHECK-NEXT: vmv.x.s a0, v8
198198
; CHECK-NEXT: ret
199199
%b = bitcast <1 x i32> %a to float
@@ -279,7 +279,7 @@ define double @bitcast_v1i64_f64(<1 x i64> %a) {
279279
define <1 x i16> @bitcast_i16_v1i16(i16 %a) {
280280
; CHECK-LABEL: bitcast_i16_v1i16:
281281
; CHECK: # %bb.0:
282-
; CHECK-NEXT: vsetivli a1, 1, e16,m1,ta,mu
282+
; CHECK-NEXT: vsetivli a1, 1, e16,mf4,ta,mu
283283
; CHECK-NEXT: vmv.v.x v8, a0
284284
; CHECK-NEXT: ret
285285
%b = bitcast i16 %a to <1 x i16>
@@ -289,13 +289,13 @@ define <1 x i16> @bitcast_i16_v1i16(i16 %a) {
289289
define <2 x i16> @bitcast_i32_v2i16(i32 %a) {
290290
; RV32-LABEL: bitcast_i32_v2i16:
291291
; RV32: # %bb.0:
292-
; RV32-NEXT: vsetivli a1, 1, e32,m1,ta,mu
292+
; RV32-NEXT: vsetivli a1, 1, e32,mf2,ta,mu
293293
; RV32-NEXT: vmv.s.x v8, a0
294294
; RV32-NEXT: ret
295295
;
296296
; RV64-LABEL: bitcast_i32_v2i16:
297297
; RV64: # %bb.0:
298-
; RV64-NEXT: vsetivli a1, 1, e32,m1,ta,mu
298+
; RV64-NEXT: vsetivli a1, 1, e32,mf2,ta,mu
299299
; RV64-NEXT: vmv.v.x v8, a0
300300
; RV64-NEXT: ret
301301
%b = bitcast i32 %a to <2 x i16>
@@ -305,13 +305,13 @@ define <2 x i16> @bitcast_i32_v2i16(i32 %a) {
305305
define <1 x i32> @bitcast_i32_v1i32(i32 %a) {
306306
; RV32-LABEL: bitcast_i32_v1i32:
307307
; RV32: # %bb.0:
308-
; RV32-NEXT: vsetivli a1, 1, e32,m1,ta,mu
308+
; RV32-NEXT: vsetivli a1, 1, e32,mf2,ta,mu
309309
; RV32-NEXT: vmv.s.x v8, a0
310310
; RV32-NEXT: ret
311311
;
312312
; RV64-LABEL: bitcast_i32_v1i32:
313313
; RV64: # %bb.0:
314-
; RV64-NEXT: vsetivli a1, 1, e32,m1,ta,mu
314+
; RV64-NEXT: vsetivli a1, 1, e32,mf2,ta,mu
315315
; RV64-NEXT: vmv.v.x v8, a0
316316
; RV64-NEXT: ret
317317
%b = bitcast i32 %a to <1 x i32>

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -381,7 +381,7 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
381381
; LMULMAX2-RV32-NEXT: lui a4, 4080
382382
; LMULMAX2-RV32-NEXT: vand.vx v27, v27, a4
383383
; LMULMAX2-RV32-NEXT: addi a5, zero, 5
384-
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
384+
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,mf8,ta,mu
385385
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5
386386
; LMULMAX2-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
387387
; LMULMAX2-RV32-NEXT: vmv.v.i v28, 0
@@ -581,7 +581,7 @@ define void @bitreverse_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
581581
; LMULMAX1-RV32-NEXT: lui a4, 4080
582582
; LMULMAX1-RV32-NEXT: vand.vx v27, v27, a4
583583
; LMULMAX1-RV32-NEXT: addi a5, zero, 5
584-
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
584+
; LMULMAX1-RV32-NEXT: vsetivli a1, 1, e8,mf8,ta,mu
585585
; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5
586586
; LMULMAX1-RV32-NEXT: vsetivli a1, 4, e32,m1,ta,mu
587587
; LMULMAX1-RV32-NEXT: vmv.v.i v28, 0
@@ -1245,7 +1245,7 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
12451245
; LMULMAX2-RV32-NEXT: lui a4, 4080
12461246
; LMULMAX2-RV32-NEXT: vand.vx v30, v30, a4
12471247
; LMULMAX2-RV32-NEXT: addi a5, zero, 85
1248-
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,m1,ta,mu
1248+
; LMULMAX2-RV32-NEXT: vsetivli a1, 1, e8,mf8,ta,mu
12491249
; LMULMAX2-RV32-NEXT: vmv.s.x v0, a5
12501250
; LMULMAX2-RV32-NEXT: vsetivli a1, 8, e32,m2,ta,mu
12511251
; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0
@@ -1447,7 +1447,7 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
14471447
; LMULMAX1-RV32-NEXT: lui a6, 4080
14481448
; LMULMAX1-RV32-NEXT: vand.vx v28, v26, a6
14491449
; LMULMAX1-RV32-NEXT: addi a5, zero, 5
1450-
; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e8,m1,ta,mu
1450+
; LMULMAX1-RV32-NEXT: vsetivli a2, 1, e8,mf8,ta,mu
14511451
; LMULMAX1-RV32-NEXT: vmv.s.x v0, a5
14521452
; LMULMAX1-RV32-NEXT: vsetivli a2, 4, e32,m1,ta,mu
14531453
; LMULMAX1-RV32-NEXT: vmv.v.i v26, 0

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
define <4 x i8> @ret_v4i8(<4 x i8>* %p) {
88
; CHECK-LABEL: ret_v4i8:
99
; CHECK: # %bb.0:
10-
; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu
10+
; CHECK-NEXT: vsetivli a1, 4, e8,mf4,ta,mu
1111
; CHECK-NEXT: vle8.v v8, (a0)
1212
; CHECK-NEXT: ret
1313
%v = load <4 x i8>, <4 x i8>* %p
@@ -107,7 +107,7 @@ define <16 x i64> @ret_v16i64(<16 x i64>* %p) {
107107
define <8 x i1> @ret_mask_v8i1(<8 x i1>* %p) {
108108
; CHECK-LABEL: ret_mask_v8i1:
109109
; CHECK: # %bb.0:
110-
; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
110+
; CHECK-NEXT: vsetivli a1, 8, e8,mf2,ta,mu
111111
; CHECK-NEXT: vle1.v v0, (a0)
112112
; CHECK-NEXT: ret
113113
%v = load <8 x i1>, <8 x i1>* %p
@@ -490,7 +490,7 @@ define <128 x i32> @ret_split_v128i32(<128 x i32>* %x) {
490490
define <4 x i8> @ret_v8i8_param_v4i8(<4 x i8> %v) {
491491
; CHECK-LABEL: ret_v8i8_param_v4i8:
492492
; CHECK: # %bb.0:
493-
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
493+
; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu
494494
; CHECK-NEXT: vadd.vi v8, v8, 2
495495
; CHECK-NEXT: ret
496496
%r = add <4 x i8> %v, <i8 2, i8 2, i8 2, i8 2>
@@ -500,7 +500,7 @@ define <4 x i8> @ret_v8i8_param_v4i8(<4 x i8> %v) {
500500
define <4 x i8> @ret_v4i8_param_v4i8_v4i8(<4 x i8> %v, <4 x i8> %w) {
501501
; CHECK-LABEL: ret_v4i8_param_v4i8_v4i8:
502502
; CHECK: # %bb.0:
503-
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
503+
; CHECK-NEXT: vsetivli a0, 4, e8,mf4,ta,mu
504504
; CHECK-NEXT: vadd.vv v8, v8, v9
505505
; CHECK-NEXT: ret
506506
%r = add <4 x i8> %v, %w
@@ -539,7 +539,7 @@ define <4 x i64> @ret_v4i64_param_v4i64_v4i64(<4 x i64> %v, <4 x i64> %w) {
539539
define <8 x i1> @ret_v8i1_param_v8i1_v8i1(<8 x i1> %v, <8 x i1> %w) {
540540
; CHECK-LABEL: ret_v8i1_param_v8i1_v8i1:
541541
; CHECK: # %bb.0:
542-
; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
542+
; CHECK-NEXT: vsetivli a0, 8, e8,mf2,ta,mu
543543
; CHECK-NEXT: vmxor.mm v0, v0, v8
544544
; CHECK-NEXT: ret
545545
%r = xor <8 x i1> %v, %w
@@ -1022,7 +1022,7 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
10221022
; LMULMAX8-NEXT: addi s0, sp, 256
10231023
; LMULMAX8-NEXT: .cfi_def_cfa s0, 0
10241024
; LMULMAX8-NEXT: andi sp, sp, -128
1025-
; LMULMAX8-NEXT: vsetivli a2, 2, e32,m1,ta,mu
1025+
; LMULMAX8-NEXT: vsetivli a2, 2, e32,mf2,ta,mu
10261026
; LMULMAX8-NEXT: vle32.v v8, (a0)
10271027
; LMULMAX8-NEXT: addi a0, zero, 32
10281028
; LMULMAX8-NEXT: vsetvli a0, a0, e32,m8,ta,mu
@@ -1051,7 +1051,7 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
10511051
; LMULMAX4-NEXT: addi s0, sp, 256
10521052
; LMULMAX4-NEXT: .cfi_def_cfa s0, 0
10531053
; LMULMAX4-NEXT: andi sp, sp, -128
1054-
; LMULMAX4-NEXT: vsetivli a2, 2, e32,m1,ta,mu
1054+
; LMULMAX4-NEXT: vsetivli a2, 2, e32,mf2,ta,mu
10551055
; LMULMAX4-NEXT: vle32.v v8, (a0)
10561056
; LMULMAX4-NEXT: vsetivli a0, 16, e32,m4,ta,mu
10571057
; LMULMAX4-NEXT: vle32.v v16, (a1)
@@ -1083,7 +1083,7 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
10831083
; LMULMAX2-NEXT: addi s0, sp, 128
10841084
; LMULMAX2-NEXT: .cfi_def_cfa s0, 0
10851085
; LMULMAX2-NEXT: andi sp, sp, -128
1086-
; LMULMAX2-NEXT: vsetivli a2, 2, e32,m1,ta,mu
1086+
; LMULMAX2-NEXT: vsetivli a2, 2, e32,mf2,ta,mu
10871087
; LMULMAX2-NEXT: vle32.v v8, (a0)
10881088
; LMULMAX2-NEXT: vsetivli a0, 8, e32,m2,ta,mu
10891089
; LMULMAX2-NEXT: vle32.v v14, (a1)
@@ -1122,7 +1122,7 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
11221122
; LMULMAX1-NEXT: addi s0, sp, 128
11231123
; LMULMAX1-NEXT: .cfi_def_cfa s0, 0
11241124
; LMULMAX1-NEXT: andi sp, sp, -128
1125-
; LMULMAX1-NEXT: vsetivli a2, 2, e32,m1,ta,mu
1125+
; LMULMAX1-NEXT: vsetivli a2, 2, e32,mf2,ta,mu
11261126
; LMULMAX1-NEXT: vle32.v v8, (a0)
11271127
; LMULMAX1-NEXT: vsetivli a0, 4, e32,m1,ta,mu
11281128
; LMULMAX1-NEXT: vle32.v v13, (a1)

0 commit comments

Comments
 (0)