Skip to content

[RISCV] Disable fixed length vectors with Zve32* without Zvl64b. #102405

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ Changes to the RISC-V Backend
fill value) rather than NOPs.
* Added Syntacore SCR4 CPUs: ``-mcpu=syntacore-scr4-rv32/64``
* ``-mcpu=sifive-p470`` was added.
* Fixed length vector support using RVV instructions now requires VLEN>=64. This
means Zve32x and Zve32f will also require Zvl64b. The prior support was
largely untested.

Changes to the WebAssembly Backend
----------------------------------
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const {
}

bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
return hasVInstructions() && getMinRVVVectorSizeInBits() != 0;
return hasVInstructions() &&
getMinRVVVectorSizeInBits() >= RISCV::RVVBitsPerBlock;
}

bool RISCVSubtarget::enableSubRegLiveness() const { return true; }
Expand Down
156 changes: 78 additions & 78 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64
; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvkb -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-V
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvkb -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-V
; RUN: llc -mtriple=riscv32 -mattr=+zve32x,+zvfh,+zvkb -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-ZVE32X
; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvfh,+zvkb -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-ZVE32X
; RUN: llc -mtriple=riscv32 -mattr=+zve32x,+zvfh,+zvkb,+zvl64b -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-ZVE32X
; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvfh,+zvkb,+zvl64b -verify-machineinstrs < %s | FileCheck %s -check-prefixes=ZVKB-ZVE32X

define <8 x i1> @shuffle_v8i1_as_i8_1(<8 x i1> %v) {
; CHECK-LABEL: shuffle_v8i1_as_i8_1:
Expand Down Expand Up @@ -191,7 +191,7 @@ define <8 x i8> @shuffle_v8i8_as_i16(<8 x i8> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i16:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e16, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e16, m1, ta, ma
; ZVKB-ZVE32X-NEXT: vrev8.v v8, v8
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
Expand All @@ -215,7 +215,7 @@ define <8 x i8> @shuffle_v8i8_as_i32_8(<8 x i8> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i32_8:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; ZVKB-ZVE32X-NEXT: vror.vi v8, v8, 8
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
Expand All @@ -239,7 +239,7 @@ define <8 x i8> @shuffle_v8i8_as_i32_16(<8 x i8> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i32_16:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; ZVKB-ZVE32X-NEXT: vror.vi v8, v8, 16
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
Expand All @@ -263,7 +263,7 @@ define <8 x i8> @shuffle_v8i8_as_i32_24(<8 x i8> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i32_24:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vsetivli zero, 2, e32, m1, ta, ma
; ZVKB-ZVE32X-NEXT: vror.vi v8, v8, 24
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
Expand All @@ -287,10 +287,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_8(<8 x i8> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_8:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 1
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 7
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 1
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 7
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
ret <8 x i8> %shuffle
Expand All @@ -313,10 +313,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_16(<8 x i8> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_16:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 2
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 6
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 2
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 6
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
ret <8 x i8> %shuffle
Expand All @@ -339,10 +339,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_24(<8 x i8> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_24:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 3
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 5
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 3
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 5
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2>
ret <8 x i8> %shuffle
Expand All @@ -365,10 +365,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_32(<8 x i8> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_32:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 4
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 4
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 4
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 4
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
ret <8 x i8> %shuffle
Expand All @@ -391,10 +391,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_40(<8 x i8> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_40:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 5
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 3
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 5
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 3
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
ret <8 x i8> %shuffle
Expand All @@ -417,10 +417,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_48(<8 x i8> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_48:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 6
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 2
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 6
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 2
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
ret <8 x i8> %shuffle
Expand All @@ -443,10 +443,10 @@ define <8 x i8> @shuffle_v8i8_as_i64_56(<8 x i8> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i8_as_i64_56:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v10, v8, 7
; ZVKB-ZVE32X-NEXT: vslideup.vi v10, v8, 1
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e8, m1, ta, ma
; ZVKB-ZVE32X-NEXT: vslidedown.vi v9, v8, 7
; ZVKB-ZVE32X-NEXT: vslideup.vi v9, v8, 1
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v9
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
ret <8 x i8> %shuffle
Expand All @@ -469,7 +469,7 @@ define <8 x i16> @shuffle_v8i16_as_i32(<8 x i16> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8i16_as_i32:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e32, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vror.vi v8, v8, 16
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
Expand Down Expand Up @@ -512,11 +512,11 @@ define <8 x i16> @shuffle_v8i16_as_i64_16(<8 x i16> %v) {
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI19_0)
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI19_0)
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
ret <8 x i16> %shuffle
Expand Down Expand Up @@ -558,11 +558,11 @@ define <8 x i16> @shuffle_v8i16_as_i64_32(<8 x i16> %v) {
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI20_0)
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI20_0)
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
ret <8 x i16> %shuffle
Expand Down Expand Up @@ -604,11 +604,11 @@ define <8 x i16> @shuffle_v8i16_as_i64_48(<8 x i16> %v) {
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI21_0)
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI21_0)
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
ret <8 x i16> %shuffle
Expand Down Expand Up @@ -650,12 +650,12 @@ define <8 x i32> @shuffle_v8i32_as_i64(<8 x i32> %v) {
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI22_0)
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI22_0)
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v16, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v24, v16
; ZVKB-ZVE32X-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v16, v8, v24
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v16
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
; ZVKB-ZVE32X-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v12, v8, v16
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x i32> %shuffle
Expand All @@ -678,7 +678,7 @@ define <8 x half> @shuffle_v8f16_as_i32(<8 x half> %v) {
;
; ZVKB-ZVE32X-LABEL: shuffle_v8f16_as_i32:
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e32, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vsetivli zero, 4, e32, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vror.vi v8, v8, 16
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
Expand Down Expand Up @@ -721,11 +721,11 @@ define <8 x half> @shuffle_v8f16_as_i64_16(<8 x half> %v) {
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI24_0)
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI24_0)
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
ret <8 x half> %shuffle
Expand Down Expand Up @@ -767,11 +767,11 @@ define <8 x half> @shuffle_v8f16_as_i64_32(<8 x half> %v) {
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI25_0)
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI25_0)
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
ret <8 x half> %shuffle
Expand Down Expand Up @@ -813,11 +813,11 @@ define <8 x half> @shuffle_v8f16_as_i64_48(<8 x half> %v) {
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI26_0)
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI26_0)
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
; ZVKB-ZVE32X-NEXT: vrgather.vv v12, v8, v16
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v10, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v12, v10
; ZVKB-ZVE32X-NEXT: vrgather.vv v10, v8, v12
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v10
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x half> %v, <8 x half> poison, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
ret <8 x half> %shuffle
Expand Down Expand Up @@ -859,12 +859,12 @@ define <8 x float> @shuffle_v8f32_as_i64(<8 x float> %v) {
; ZVKB-ZVE32X: # %bb.0:
; ZVKB-ZVE32X-NEXT: lui a0, %hi(.LCPI27_0)
; ZVKB-ZVE32X-NEXT: addi a0, a0, %lo(.LCPI27_0)
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v16, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v24, v16
; ZVKB-ZVE32X-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v16, v8, v24
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v16
; ZVKB-ZVE32X-NEXT: vsetivli zero, 8, e16, m2, ta, ma
; ZVKB-ZVE32X-NEXT: vle8.v v12, (a0)
; ZVKB-ZVE32X-NEXT: vsext.vf2 v16, v12
; ZVKB-ZVE32X-NEXT: vsetvli zero, zero, e32, m4, ta, ma
; ZVKB-ZVE32X-NEXT: vrgatherei16.vv v12, v8, v16
; ZVKB-ZVE32X-NEXT: vmv.v.v v8, v12
; ZVKB-ZVE32X-NEXT: ret
%shuffle = shufflevector <8 x float> %v, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
ret <8 x float> %shuffle
Expand Down
33 changes: 6 additions & 27 deletions llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,21 @@
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64"

; We can't use scalable vectorization for Zvl32b due to RVVBitsPerBlock being
; 64. Since our vscale value is vlen/RVVBitsPerBlock this makes vscale 0.
; Make sure we fall back to fixed vectorization instead.
; We can't vectorize with Zvl32b due to RVVBitsPerBlock being 64. Since our
; vscale value is vlen/RVVBitsPerBlock this makes vscale 0.
define void @vector_add_i16(ptr noalias nocapture %a, i16 %v, i64 %n) {
; CHECK-LABEL: @vector_add_i16(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[V:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <4 x i64> [[VEC_IND]]
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> [[TMP0]], i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> poison)
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> [[TMP1]], <4 x ptr> [[TMP0]], i32 2, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1020, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]]
; CHECK-NEXT: [[ELEM:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
; CHECK-NEXT: [[ADD:%.*]] = add i16 [[ELEM]], [[V]]
; CHECK-NEXT: [[ADD:%.*]] = add i16 [[ELEM]], [[V:%.*]]
; CHECK-NEXT: store i16 [[ADD]], ptr [[ARRAYIDX]], align 2
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
Expand Down
Loading