[RISCV] Disable combineToVCPOP for illegal scalable vector types. (#140195)

topperc · web-flow · commit 976b00654a37 · 2025-05-16T11:24:26.000-07:00
This transform creates target specific instructions which must have
legal types. We were checking this for fixed vectors, but not scalable
vectors. This caused a crash with &lt;vscale x 1 x i1&gt; which isn't legal
for Zve32x.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19031,6 +19031,10 @@ static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG,
   if (!SrcMVT.isVector() || SrcMVT.getVectorElementType() != MVT::i1)
     return SDValue();
 
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.isTypeLegal(SrcMVT))
+    return SDValue();
+
   // Check that destination type is large enough to hold result without
   // overflow.
   if (Opc == ISD::VECREDUCE_ADD) {
@@ -19047,9 +19051,6 @@ static SDValue combineToVCPOP(SDNode *N, SelectionDAG &DAG,
 
   MVT ContainerVT = SrcMVT;
   if (SrcMVT.isFixedLengthVector()) {
-    if (!useRVVForFixedLengthVectorVT(SrcMVT, Subtarget))
-      return SDValue();
-
     ContainerVT = getContainerForFixedLengthVector(DAG, SrcMVT, Subtarget);
     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
   }
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll
@@ -1,13 +1,20 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,V
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zbb | FileCheck %s --check-prefixes=CHECK,V
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zve32x,+zvl128b,+zbb | FileCheck %s --check-prefixes=CHECK,ZVE
 
 define i32 @test_v2i1(<2 x i1> %x) {
-; CHECK-LABEL: test_v2i1:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
+; V-LABEL: test_v2i1:
+; V:       # %bb.0:
+; V-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; V-NEXT:    vcpop.m a0, v0
+; V-NEXT:    ret
+;
+; ZVE-LABEL: test_v2i1:
+; ZVE:       # %bb.0:
+; ZVE-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
+; ZVE-NEXT:    vcpop.m a0, v0
+; ZVE-NEXT:    ret
   %a = zext <2 x i1> %x to <2 x i32>
   %b = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
   ret i32 %b
@@ -173,6 +180,35 @@ define i32 @test_v256i1(<256 x i1> %x) {
   ret i32 %b
 }
 
+; FIXME: Optimize this case with Zve32x. We have to use mf4 and set the VL to
+; VLEN/64.
+define i32 @test_nxv1i1(<vscale x 1 x i1> %x) {
+; V-LABEL: test_nxv1i1:
+; V:       # %bb.0: # %entry
+; V-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
+; V-NEXT:    vcpop.m a0, v0
+; V-NEXT:    ret
+;
+; ZVE-LABEL: test_nxv1i1:
+; ZVE:       # %bb.0: # %entry
+; ZVE-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
+; ZVE-NEXT:    vmv.v.i v8, 0
+; ZVE-NEXT:    csrr a0, vlenb
+; ZVE-NEXT:    srli a0, a0, 3
+; ZVE-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; ZVE-NEXT:    vmerge.vim v8, v8, 1, v0
+; ZVE-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
+; ZVE-NEXT:    vmv.s.x v9, zero
+; ZVE-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
+; ZVE-NEXT:    vredsum.vs v9, v8, v9
+; ZVE-NEXT:    vmv.x.s a0, v9
+; ZVE-NEXT:    ret
+entry:
+  %a = zext <vscale x 1 x i1> %x to <vscale x 1 x i32>
+  %b = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> %a)
+  ret i32 %b
+}
+
 define i32 @test_nxv2i1(<vscale x 2 x i1> %x) {
 ; CHECK-LABEL: test_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
@@ -520,7 +556,3 @@ entry:
   %b = call i16 @llvm.vector.reduce.add.nxv64i16(<vscale x 64 x i16> %a)
   ret i16 %b
 }
-
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32: {{.*}}
-; RV64: {{.*}}