Skip to content

Commit 5972819

Browse files
authored
[RISCV] Disable fixed length vectors with Zve32* without Zvl64b. (#102405)
Fixed length vectors use scalable vector containers. With Zve32* and not Zvl64b, vscale is a 0.5 due RVVBitsPerBlock being 64. To support this correctly we need to lower RVVBitsPerBlock to 32 and change our type mapping. But we need to RVVBitsPerBlock to alway be >= ELEN. This means we need two different mapping depending on ELEN. That is a non-trivial amount of work so disable fixed lenght vectors without Zvl64b for now. We had almost no tests for Zve32x without Zvl64b which is probably why we never realized that it was broken. Fixes #102352.
1 parent 73aa4e4 commit 5972819

File tree

4 files changed

+89
-106
lines changed

4 files changed

+89
-106
lines changed

llvm/docs/ReleaseNotes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ Changes to the RISC-V Backend
108108
fill value) rather than NOPs.
109109
* Added Syntacore SCR4 CPUs: ``-mcpu=syntacore-scr4-rv32/64``
110110
* ``-mcpu=sifive-p470`` was added.
111+
* Fixed length vector support using RVV instructions now requires VLEN>=64. This
112+
means Zve32x and Zve32f will also require Zvl64b. The prior support was
113+
largely untested.
111114

112115
Changes to the WebAssembly Backend
113116
----------------------------------

llvm/lib/Target/RISCV/RISCVSubtarget.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,8 @@ unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const {
179179
}
180180

181181
bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
182-
return hasVInstructions() && getMinRVVVectorSizeInBits() != 0;
182+
return hasVInstructions() &&
183+
getMinRVVVectorSizeInBits() >= RISCV::RVVBitsPerBlock;
183184
}
184185

185186
bool RISCVSubtarget::enableSubRegLiveness() const { return true; }

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll

Lines changed: 78 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64
44
; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvkb -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-V
55
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvkb -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-V
6-
; RUN: llc -mtriple=riscv32 -mattr=+zve32x,+zvfh,+zvkb -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-ZVE32X
7-
; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvfh,+zvkb -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-ZVE32X
6+
; RUN: llc -mtriple=riscv32 -mattr=+zve32x,+zvfh,+zvkb,+zvl64b -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-ZVE32X
7+
; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvfh,+zvkb,+zvl64b -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-ZVE32X
88

99
define <8 x i1> @shuffle_v8i1_as_i8_1(<8 x i1> %v) {
1010
; CHECK-LABEL: shuffle_v8i1_as_i8_1:
@@ -191,7 +191,7 @@ define <8 x i8> @shuffle_v8i8_as_i16(<8 x i8> %v) {
191191
;
192192
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i16:
193193
; ZVKB-ZVE32X: # %bb.0:
194-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e16, m2, ta, ma
194+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e16, m1, ta, ma
195195
; ZVKB-ZVE32X-NEXT: vrev8.v v8, v8
196196
; ZVKB-ZVE32X-NEXT: ret
197197
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
@@ -215,7 +215,7 @@ define <8 x i8> @shuffle_v8i8_as_i32_8(<8 x i8> %v) {
215215
;
216216
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i32_8:
217217
; ZVKB-ZVE32X: # %bb.0:
218-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m2, ta, ma
218+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m1, ta, ma
219219
; ZVKB-ZVE32X-NEXT: vror.vi v8, v8, 8
220220
; ZVKB-ZVE32X-NEXT: ret
221221
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
@@ -239,7 +239,7 @@ define <8 x i8> @shuffle_v8i8_as_i32_16(<8 x i8> %v) {
239239
;
240240
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i32_16:
241241
; ZVKB-ZVE32X: # %bb.0:
242-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m2, ta, ma
242+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m1, ta, ma
243243
; ZVKB-ZVE32X-NEXT: vror.vi v8, v8, 16
244244
; ZVKB-ZVE32X-NEXT: ret
245245
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
@@ -263,7 +263,7 @@ define <8 x i8> @shuffle_v8i8_as_i32_24(<8 x i8> %v) {
263263
;
264264
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i32_24:
265265
; ZVKB-ZVE32X: # %bb.0:
266-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m2, ta, ma
266+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m1, ta, ma
267267
; ZVKB-ZVE32X-NEXT: vror.vi v8, v8, 24
268268
; ZVKB-ZVE32X-NEXT: ret
269269
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
@@ -287,10 +287,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_8(<8 x i8> %v) {
287287
;
288288
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_8:
289289
; ZVKB-ZVE32X: # %bb.0:
290-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
291-
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 1
292-
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 7
293-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
290+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
291+
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 1
292+
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 7
293+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
294294
; ZVKB-ZVE32X-NEXT: ret
295295
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
296296
ret <8 x i8> %shuffle
@@ -313,10 +313,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_16(<8 x i8> %v) {
313313
;
314314
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_16:
315315
; ZVKB-ZVE32X: # %bb.0:
316-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
317-
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 2
318-
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 6
319-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
316+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
317+
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 2
318+
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 6
319+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
320320
; ZVKB-ZVE32X-NEXT: ret
321321
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
322322
ret <8 x i8> %shuffle
@@ -339,10 +339,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_24(<8 x i8> %v) {
339339
;
340340
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_24:
341341
; ZVKB-ZVE32X: # %bb.0:
342-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
343-
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 3
344-
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 5
345-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
342+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
343+
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 3
344+
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 5
345+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
346346
; ZVKB-ZVE32X-NEXT: ret
347347
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
348348
ret <8 x i8> %shuffle
@@ -365,10 +365,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_32(<8 x i8> %v) {
365365
;
366366
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_32:
367367
; ZVKB-ZVE32X: # %bb.0:
368-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
369-
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 4
370-
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 4
371-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
368+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
369+
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 4
370+
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 4
371+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
372372
; ZVKB-ZVE32X-NEXT: ret
373373
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
374374
ret <8 x i8> %shuffle
@@ -391,10 +391,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_40(<8 x i8> %v) {
391391
;
392392
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_40:
393393
; ZVKB-ZVE32X: # %bb.0:
394-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
395-
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 5
396-
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 3
397-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
394+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
395+
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 5
396+
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 3
397+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
398398
; ZVKB-ZVE32X-NEXT: ret
399399
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
400400
ret <8 x i8> %shuffle
@@ -417,10 +417,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_48(<8 x i8> %v) {
417417
;
418418
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_48:
419419
; ZVKB-ZVE32X: # %bb.0:
420-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
421-
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 6
422-
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 2
423-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
420+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
421+
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 6
422+
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 2
423+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
424424
; ZVKB-ZVE32X-NEXT: ret
425425
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
426426
ret <8 x i8> %shuffle
@@ -443,10 +443,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_56(<8 x i8> %v) {
443443
;
444444
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_56:
445445
; ZVKB-ZVE32X: # %bb.0:
446-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
447-
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 7
448-
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 1
449-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
446+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
447+
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 7
448+
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 1
449+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
450450
; ZVKB-ZVE32X-NEXT: ret
451451
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
452452
ret <8 x i8> %shuffle
@@ -469,7 +469,7 @@ define <8 x i16> @shuffle_v8i16_as_i32(<8 x i16> %v) {
469469
;
470470
; ZVKB-ZVE32X-LABEL: shuffle_v8i16_as_i32:
471471
; ZVKB-ZVE32X: # %bb.0:
472-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e32, m4, ta, ma
472+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e32, m2, ta, ma
473473
; ZVKB-ZVE32X-NEXT: vror.vi v8, v8, 16
474474
; ZVKB-ZVE32X-NEXT: ret
475475
%shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
@@ -512,11 +512,11 @@ define <8 x i16> @shuffle_v8i16_as_i64_16(<8 x i16> %v) {
512512
; ZVKB-ZVE32X: # %bb.0:
513513
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI19_0)
514514
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI19_0)
515-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
516-
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
517-
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
518-
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
519-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
515+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
516+
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
517+
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
518+
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
519+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
520520
; ZVKB-ZVE32X-NEXT: ret
521521
%shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
522522
ret <8 x i16> %shuffle
@@ -558,11 +558,11 @@ define <8 x i16> @shuffle_v8i16_as_i64_32(<8 x i16> %v) {
558558
; ZVKB-ZVE32X: # %bb.0:
559559
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI20_0)
560560
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI20_0)
561-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
562-
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
563-
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
564-
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
565-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
561+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
562+
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
563+
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
564+
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
565+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
566566
; ZVKB-ZVE32X-NEXT: ret
567567
%shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
568568
ret <8 x i16> %shuffle
@@ -604,11 +604,11 @@ define <8 x i16> @shuffle_v8i16_as_i64_48(<8 x i16> %v) {
604604
; ZVKB-ZVE32X: # %bb.0:
605605
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI21_0)
606606
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI21_0)
607-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
608-
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
609-
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
610-
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
611-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
607+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
608+
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
609+
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
610+
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
611+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
612612
; ZVKB-ZVE32X-NEXT: ret
613613
%shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
614614
ret <8 x i16> %shuffle
@@ -650,12 +650,12 @@ define <8 x i32> @shuffle_v8i32_as_i64(<8 x i32> %v) {
650650
; ZVKB-ZVE32X: # %bb.0:
651651
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI22_0)
652652
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI22_0)
653-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
654-
; ZVKB-ZVE32X-NEXT: vle8.v v16, (a0)
655-
; ZVKB-ZVE32X-NEXT: vsext.vf2 v24, v16
656-
; ZVKB-ZVE32X-NEXT: vsetvli zero, zero, e32, m8, ta, ma
657-
; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v16, v8, v24
658-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v16
653+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
654+
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
655+
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
656+
; ZVKB-ZVE32X-NEXT: vsetvli zero, zero, e32, m4, ta, ma
657+
; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v12, v8, v16
658+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
659659
; ZVKB-ZVE32X-NEXT: ret
660660
%shuffle = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
661661
ret <8 x i32> %shuffle
@@ -678,7 +678,7 @@ define <8 x half> @shuffle_v8f16_as_i32(<8 x half> %v) {
678678
;
679679
; ZVKB-ZVE32X-LABEL: shuffle_v8f16_as_i32:
680680
; ZVKB-ZVE32X: # %bb.0:
681-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e32, m4, ta, ma
681+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e32, m2, ta, ma
682682
; ZVKB-ZVE32X-NEXT: vror.vi v8, v8, 16
683683
; ZVKB-ZVE32X-NEXT: ret
684684
%shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
@@ -721,11 +721,11 @@ define <8 x half> @shuffle_v8f16_as_i64_16(<8 x half> %v) {
721721
; ZVKB-ZVE32X: # %bb.0:
722722
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI24_0)
723723
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI24_0)
724-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
725-
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
726-
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
727-
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
728-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
724+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
725+
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
726+
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
727+
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
728+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
729729
; ZVKB-ZVE32X-NEXT: ret
730730
%shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
731731
ret <8 x half> %shuffle
@@ -767,11 +767,11 @@ define <8 x half> @shuffle_v8f16_as_i64_32(<8 x half> %v) {
767767
; ZVKB-ZVE32X: # %bb.0:
768768
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI25_0)
769769
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI25_0)
770-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
771-
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
772-
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
773-
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
774-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
770+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
771+
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
772+
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
773+
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
774+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
775775
; ZVKB-ZVE32X-NEXT: ret
776776
%shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
777777
ret <8 x half> %shuffle
@@ -813,11 +813,11 @@ define <8 x half> @shuffle_v8f16_as_i64_48(<8 x half> %v) {
813813
; ZVKB-ZVE32X: # %bb.0:
814814
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI26_0)
815815
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI26_0)
816-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
817-
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
818-
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
819-
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
820-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
816+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
817+
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
818+
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
819+
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
820+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
821821
; ZVKB-ZVE32X-NEXT: ret
822822
%shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
823823
ret <8 x half> %shuffle
@@ -859,12 +859,12 @@ define <8 x float> @shuffle_v8f32_as_i64(<8 x float> %v) {
859859
; ZVKB-ZVE32X: # %bb.0:
860860
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI27_0)
861861
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI27_0)
862-
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
863-
; ZVKB-ZVE32X-NEXT: vle8.v v16, (a0)
864-
; ZVKB-ZVE32X-NEXT: vsext.vf2 v24, v16
865-
; ZVKB-ZVE32X-NEXT: vsetvli zero, zero, e32, m8, ta, ma
866-
; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v16, v8, v24
867-
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v16
862+
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
863+
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
864+
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
865+
; ZVKB-ZVE32X-NEXT: vsetvli zero, zero, e32, m4, ta, ma
866+
; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v12, v8, v16
867+
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
868868
; ZVKB-ZVE32X-NEXT: ret
869869
%shuffle = shufflevector <8 x float> %v, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
870870
ret <8 x float> %shuffle

llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,21 @@
44
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
55
target triple = "riscv64"
66

7-
; We can't use scalable vectorization for Zvl32b due to RVVBitsPerBlock being
8-
; 64. Since our vscale value is vlen/RVVBitsPerBlock this makes vscale 0.
9-
; Make sure we fall back to fixed vectorization instead.
7+
; We can't vectorize with Zvl32b due to RVVBitsPerBlock being 64. Since our
8+
; vscale value is vlen/RVVBitsPerBlock this makes vscale 0.
109
define void @vector_add_i16(ptr noalias nocapture %a, i16 %v, i64 %n) {
1110
; CHECK-LABEL: @vector_add_i16(
1211
; CHECK-NEXT: entry:
13-
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14-
; CHECK: vector.ph:
15-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[V:%.*]], i64 0
16-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
17-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
18-
; CHECK: vector.body:
19-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
20-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
21-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <4 x i64> [[VEC_IND]]
22-
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> [[TMP0]], i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> poison)
23-
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
24-
; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> [[TMP1]], <4 x ptr> [[TMP0]], i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
25-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
26-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
27-
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020
28-
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
29-
; CHECK: middle.block:
30-
; CHECK-NEXT: br label [[SCALAR_PH]]
31-
; CHECK: scalar.ph:
32-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1020, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
3312
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
3413
; CHECK: for.body:
35-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
36-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
14+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
15+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]]
3716
; CHECK-NEXT: [[ELEM:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
38-
; CHECK-NEXT: [[ADD:%.*]] = add i16 [[ELEM]], [[V]]
17+
; CHECK-NEXT: [[ADD:%.*]] = add i16 [[ELEM]], [[V:%.*]]
3918
; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX]], align 2
4019
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
4120
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
42-
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
21+
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
4322
; CHECK: for.end:
4423
; CHECK-NEXT: ret void
4524
;

0 commit comments

Comments
 (0)