-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Disable combineToVCPOP for illegal scalable vector types. #140195
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This transform creates target specific instructions which must have legal types. We were checking this for fixed vectors, but not scalable vectors. This caused a crash with <vscale x 1 x i1> which isn't legal for Zve32x.
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThis transform creates target specific instructions which must have legal types. We were checking this for fixed vectors, but not scalable vectors. This caused a crash with <vscale x 1 x i1> which isn't legal for Zve32x. Patch is 32.83 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140195.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c53550ea3b23b..045b346abf341 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18798,6 +18798,10 @@ static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG,
if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
return SDValue();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(SrcMVT))
+ return SDValue();
+
// Check that destination type is large enough to hold result without
// overflow.
if (Opc == ISD::VECREDUCE_ADD) {
@@ -18814,9 +18818,6 @@ static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG,
MVT ContainerVT = SrcMVT;
if (SrcMVT.isFixedLengthVector()) {
- if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
- return SDValue();
-
ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
index ac1d63311fd1e..582871e05801d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zve32x,+zvl64b,+zbb | FileCheck %s --check-prefixes=ZVE
define i32 @test_v2i1(<2 x i1> %x) {
; CHECK-LABEL: test_v2i1:
@@ -8,6 +9,12 @@ define i32 @test_v2i1(<2 x i1> %x) {
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v2i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <2 x i1> %x to <2 x i32>
%b = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
ret i32 %b
@@ -19,6 +26,12 @@ define i32 @test_v4i1(<4 x i1> %x) {
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v4i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <4 x i1> %x to <4 x i32>
%b = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
ret i32 %b
@@ -30,6 +43,12 @@ define i32 @test_v8i1(<8 x i1> %x) {
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v8i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: vsetivli zero, 8, e8, m1, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <8 x i1> %x to <8 x i32>
%b = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a)
ret i32 %b
@@ -41,6 +60,12 @@ define i32 @test_v16i1(<16 x i1> %x) {
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v16i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: vsetivli zero, 16, e8, m2, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <16 x i1> %x to <16 x i32>
%b = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a)
ret i32 %b
@@ -53,6 +78,13 @@ define i32 @test_v32i1(<32 x i1> %x) {
; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v32i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: li a0, 32
+; ZVE-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <32 x i1> %x to <32 x i32>
%b = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %a)
ret i32 %b
@@ -65,6 +97,13 @@ define i32 @test_v64i1(<64 x i1> %x) {
; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v64i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: li a0, 64
+; ZVE-NEXT: vsetvli zero, a0, e8, m8, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
%a = zext <64 x i1> %x to <64 x i32>
%b = call i32 @llvm.vector.reduce.add.v64i32(<64 x i32> %a)
ret i32 %b
@@ -77,6 +116,93 @@ define i32 @test_v128i1(<128 x i1> %x) {
; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v128i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: addi sp, sp, -16
+; ZVE-NEXT: .cfi_def_cfa_offset 16
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: sub sp, sp, a0
+; ZVE-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vmv1r.v v7, v8
+; ZVE-NEXT: vmv1r.v v6, v0
+; ZVE-NEXT: vslidedown.vi v5, v8, 4
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmv.v.i v16, 0
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v5, 2
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vslidedown.vi v4, v6, 4
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v4, 2
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v7, 2
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v6, 2
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vadd.vv v16, v16, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv.v.i v16, 0
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv1r.v v0, v5
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: vmv1r.v v0, v4
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: vmv1r.v v0, v7
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vmv1r.v v0, v6
+; ZVE-NEXT: vmerge.vim v16, v16, 1, v0
+; ZVE-NEXT: vadd.vv v16, v16, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v24, v0, v24
+; ZVE-NEXT: vadd.vv v8, v16, v8
+; ZVE-NEXT: vadd.vv v8, v8, v24
+; ZVE-NEXT: vmv.s.x v16, zero
+; ZVE-NEXT: vredsum.vs v8, v8, v16
+; ZVE-NEXT: vmv.x.s a0, v8
+; ZVE-NEXT: csrr a1, vlenb
+; ZVE-NEXT: slli a1, a1, 4
+; ZVE-NEXT: add sp, sp, a1
+; ZVE-NEXT: .cfi_def_cfa sp, 16
+; ZVE-NEXT: addi sp, sp, 16
+; ZVE-NEXT: .cfi_def_cfa_offset 0
+; ZVE-NEXT: ret
%a = zext <128 x i1> %x to <128 x i32>
%b = call i32 @llvm.vector.reduce.add.v128i32(<128 x i32> %a)
ret i32 %b
@@ -168,17 +294,331 @@ define i32 @test_v256i1(<256 x i1> %x) {
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_v256i1:
+; ZVE: # %bb.0:
+; ZVE-NEXT: addi sp, sp, -16
+; ZVE-NEXT: .cfi_def_cfa_offset 16
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a1, a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: sub sp, sp, a0
+; ZVE-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vmv1r.v v6, v10
+; ZVE-NEXT: vmv1r.v v4, v9
+; ZVE-NEXT: vmv1r.v v7, v8
+; ZVE-NEXT: vmv1r.v v5, v0
+; ZVE-NEXT: vslidedown.vi v3, v9, 4
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmv.v.i v16, 0
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v3, 2
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vslidedown.vi v2, v5, 4
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 2
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v2, 2
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vslidedown.vi v1, v6, 4
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 5
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v1, 2
+; ZVE-NEXT: vsetivli zero, 4, e8, m1, ta, ma
+; ZVE-NEXT: vslidedown.vi v24, v7, 4
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs1r.v v24, (a0) # vscale x 8-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v24, 2
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v4, 2
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v5, 2
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 2
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 5
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 2
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v6, 2
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vadd.vv v8, v8, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 5
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vsetivli zero, 2, e8, mf2, ta, ma
+; ZVE-NEXT: vslidedown.vi v0, v7, 2
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v8, v8, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv1r.v v0, v3
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: vmv1r.v v0, v2
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv1r.v v0, v1
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl1r.v v0, (a0) # vscale x 8-byte Folded Reload
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv1r.v v0, v4
+; ZVE-NEXT: vmerge.vim v8, v16, 1, v0
+; ZVE-NEXT: vmv1r.v v0, v5
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vadd.vv v8, v24, v8
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZVE-NEXT: vmv1r.v v0, v6
+; ZVE-NEXT: vmerge.vim v24, v16, 1, v0
+; ZVE-NEXT: vmv1r.v v0, v7
+; ZVE-NEXT: vmerge.vim v16, v16, 1, v0
+; ZVE-NEXT: vadd.vv v16, v16, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 2
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 5
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v24, v24, v0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v0, v0, v8
+; ZVE-NEXT: vadd.vv v24, v0, v24
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 3
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: slli a0, a0, 4
+; ZVE-NEXT: mv a1, a0
+; ZVE-NEXT: slli a0, a0, 1
+; ZVE-NEXT: add a0, a0, a1
+; ZVE-NEXT: add a0, sp, a0
+; ZVE-NEXT: addi a0, a0, 16
+; ZVE-NEXT: vl8r.v v0, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v0, v8, v0
+; ZVE-NEXT: addi a0, sp, 16
+; ZVE-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZVE-NEXT: vadd.vv v8, v8, v16
+; ZVE-NEXT: vadd.vv v8, v8, v0
+; ZVE-NEXT: vadd.vv v8, v8, v24
+; ZVE-NEXT: vmv.s.x v16, zero
+; ZVE-NEXT: vredsum.vs v8, v8, v16
+; ZVE-NEXT: vmv.x.s a0, v8
+; ZVE-NEXT: csrr a1, vlenb
+; ZVE-NEXT: slli a1, a1, 3
+; ZVE-NEXT: mv a2, a1
+; ZVE-NEXT: slli a1, a1, 1
+; ZVE-NEXT: add a2, a2, a1
+; ZVE-NEXT: slli a1, a1, 1
+; ZVE-NEXT: add a1, a1, a2
+; ZVE-NEXT: add sp, sp, a1
+; ZVE-NEXT: .cfi_def_cfa sp, 16
+; ZVE-NEXT: addi sp, sp, 16
+; ZVE-NEXT: .cfi_def_cfa_offset 0
+; ZVE-NEXT: ret
%a = zext <256 x i1> %x to <256 x i32>
%b = call i32 @llvm.vector.reduce.add.v256i32(<256 x i32> %a)
ret i32 %b
}
+; FIXME: Optimize this case with Zve32x. We have to use mf4 and set the VL to
+; VLEN/64.
+define i32 @test_nxv1i1(<vscale x 1 x i1> %x) {
+; CHECK-LABEL: test_nxv1i1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma
+; CHECK-NEXT: vcpop.m a0, v0
+; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_nxv1i1:
+; ZVE: # %bb.0: # %entry
+; ZVE-NEXT: vsetvli a0, zero, e32, m1, ta, ma
+; ZVE-NEXT: vmv.v.i v8, 0
+; ZVE-NEXT: csrr a0, vlenb
+; ZVE-NEXT: srli a0, a0, 3
+; ZVE-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVE-NEXT: vmerge.vim v8, v8, 1, v0
+; ZVE-NEXT: vsetivli zero, 1, e32, m1, ta, ma
+; ZVE-NEXT: vmv.s.x v9, zero
+; ZVE-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; ZVE-NEXT: vredsum.vs v9, v8, v9
+; ZVE-NEXT: vmv.x.s a0, v9
+; ZVE-NEXT: ret
+entry:
+ %a = zext <vscale x 1 x i1> %x to <vscale x 1 x i32>
+ %b = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %a)
+ ret i32 %b
+}
+
define i32 @test_nxv2i1(<vscale x 2 x i1> %x) {
; CHECK-LABEL: test_nxv2i1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
; CHECK-NEXT: vcpop.m a0, v0
; CHECK-NEXT: ret
+;
+; ZVE-LABEL: test_nxv2i1:
+; ZVE: # %bb.0: # %entry
+; ZVE-NEXT: vsetvli a0, zero, e8, mf4, ta, ma
+; ZVE-NEXT: vcpop.m a0, v0
+; ZVE-NEXT: ret
entry:
%a = zext <vscale x 2 x i1> %x to <vscale x 2 x i32>
%b = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x ...
[truncated]
|
@@ -18814,9 +18818,6 @@ static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG, | |||
|
|||
MVT ContainerVT = SrcMVT; | |||
if (SrcMVT.isFixedLengthVector()) { | |||
if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be equivalent to the isTypeLegal check so I removed it.
@@ -1,13 +1,20 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,RV32 | |||
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,RV64 | |||
; RUN: llc < %s -mtriple=riscv64 -mattr=+zve32x,+zvl64b,+zbb | FileCheck %s --check-prefixes=ZVE |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add suitable prefixes to remove duplications?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This transform creates target specific instructions which must have legal types. We were checking this for fixed vectors, but not scalable vectors. This caused a crash with <vscale x 1 x i1> which isn't legal for Zve32x.