[RISCV][GISEL] instruction-select for G_SPLAT_VECTOR #111193

michaelmaitland · 2024-10-04T18:38:31Z

SelectionDAG lowers ISD::SPLAT_VECTOR in a pre-instruction-select step too.

By taking this approach, we allow the generic combiner to operate on G_SPLAT_VECTOR instead of RISCV Generic Opcodes.

Co-authored-by: Craig Topper [email protected]

SelectionDAG lowers ISD::SPLAT_VECTOR in a pre-instruction-select step too. By taking this approach, we allow the generic combiner to operate on G_SPLAT_VECTOR instead of RISCV Generic Opcodes.

llvmbot · 2024-10-04T18:38:50Z

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-risc-v

Author: Michael Maitland (michaelmaitland)

Changes

SelectionDAG lowers ISD::SPLAT_VECTOR in a pre-instruction-select step too.

By taking this approach, we allow the generic combiner to operate on G_SPLAT_VECTOR instead of RISCV Generic Opcodes.

Patch is 49.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/111193.diff

4 Files Affected:

(modified) llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp (+39)
(modified) llvm/lib/Target/RISCV/RISCVInstrGISel.td (+16)
(added) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/splatvector-rv32.mir (+600)
(added) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/splatvector-rv64.mir (+611)

diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index cfe8644b892298..eb3152ad768890 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -568,6 +568,18 @@ bool RISCVInstructionSelector::select(MachineInstr &MI) {
     return true;
   }
 
+  // FIXME: We create a IMPLICIT_DEF and a G_CONSTANT in preISelLower when
+  // we encounter a G_SPLAT_VECTOR. We cannot select the G_CONSTANT until after
+  // the MI is lowered, since renderVLOp needs to see the G_CONSTANT. It would
+  // be nice if the InstructionSelector selected these instructions without
+  // needing to call select on them explicitly.
+  if (Opc == RISCV::G_VMV_V_X_VL || Opc == RISCV::G_VFMV_V_F_VL) {
+    MachineInstr *Passthru = MRI->getVRegDef(MI.getOperand(1).getReg());
+    MachineInstr *VL = MRI->getVRegDef(MI.getOperand(3).getReg());
+    if (selectImpl(MI, *CoverageInfo))
+      return select(*Passthru) && select(*VL);
+  }
+
   if (selectImpl(MI, *CoverageInfo))
     return true;
 
@@ -800,6 +812,33 @@ void RISCVInstructionSelector::preISelLower(MachineInstr &MI,
     replacePtrWithInt(MI.getOperand(1), MIB);
     MI.setDesc(TII.get(TargetOpcode::G_AND));
     MRI->setType(DstReg, sXLen);
+    break;
+  }
+  case TargetOpcode::G_SPLAT_VECTOR: {
+    // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
+    // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
+    Register Scalar = MI.getOperand(1).getReg();
+    bool IsGPRSplat = isRegInGprb(Scalar);
+    const LLT sXLen = LLT::scalar(STI.getXLen());
+    if (IsGPRSplat && TypeSize::isKnownLT(MRI->getType(Scalar).getSizeInBits(),
+                                          sXLen.getSizeInBits()))
+      Scalar = MIB.buildAnyExt(sXLen, Scalar).getReg(0);
+
+    // Convert MI in place, since select function is trying to select this
+    // instruction.
+    unsigned Opc = IsGPRSplat ? RISCV::G_VMV_V_X_VL : RISCV::G_VFMV_V_F_VL;
+    MI.setDesc(TII.get(Opc));
+    MI.removeOperand(1);
+    LLT VecTy = MRI->getType(MI.getOperand(0).getReg());
+    auto Passthru = MIB.buildUndef(VecTy);
+    auto VLMax = MIB.buildConstant(sXLen, -1);
+    MRI->setRegBank(Passthru.getReg(0), RBI.getRegBank(RISCV::VRBRegBankID));
+    MRI->setRegBank(VLMax.getReg(0), RBI.getRegBank(RISCV::GPRBRegBankID));
+    MachineInstrBuilder(*MI.getMF(), &MI)
+        .addUse(Passthru.getReg(0))
+        .addUse(Scalar)
+        .addUse(VLMax.getReg(0));
+    break;
   }
   }
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
index f6bf74c565ab38..b26751a3c8e8e5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
@@ -49,6 +49,22 @@ def G_VMSET_VL : RISCVGenericInstruction {
 }
 def : GINodeEquiv<G_VMSET_VL, riscv_vmset_vl>;
 
+// Pseudo equivalent to a RISCVISD::VMV_V_X_VL
+def G_VMV_V_X_VL : RISCVGenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = false;
+}
+def : GINodeEquiv<G_VMV_V_X_VL, riscv_vmv_v_x_vl>;
+
+// Pseudo equivalent to a RISCVISD::VFMV_V_F_VL
+def G_VFMV_V_F_VL : RISCVGenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = false;
+}
+def : GINodeEquiv<G_VFMV_V_F_VL, riscv_vfmv_v_f_vl>;
+
 // Pseudo equivalent to a RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL. There is no
 // record to mark as equivalent to using GINodeEquiv because it gets lowered
 // before instruction selection.
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/splatvector-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/splatvector-rv32.mir
new file mode 100644
index 00000000000000..4066c4c36a8d53
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/splatvector-rv32.mir
@@ -0,0 +1,600 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v,+m -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            splat_zero_nxv1i8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv1i8
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_MF8_:%[0-9]+]]:vr = PseudoVMV_V_X_MF8 [[DEF]], [[COPY]], -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY [[PseudoVMV_V_X_MF8_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 1 x s8>) = G_SPLAT_VECTOR %3(s32)
+    $v8 = COPY %0(<vscale x 1 x s8>)
+    PseudoRET implicit $v8
+
+...
+---
+name:            splat_zero_nxv2i8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv2i8
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_MF4_:%[0-9]+]]:vr = PseudoVMV_V_X_MF4 [[DEF]], [[COPY]], -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY [[PseudoVMV_V_X_MF4_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 2 x s8>) = G_SPLAT_VECTOR %3(s32)
+    $v8 = COPY %0(<vscale x 2 x s8>)
+    PseudoRET implicit $v8
+
+...
+---
+name:            splat_zero_nxv4i8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv4i8
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_MF2_:%[0-9]+]]:vr = PseudoVMV_V_X_MF2 [[DEF]], [[COPY]], -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY [[PseudoVMV_V_X_MF2_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 4 x s8>) = G_SPLAT_VECTOR %3(s32)
+    $v8 = COPY %0(<vscale x 4 x s8>)
+    PseudoRET implicit $v8
+
+...
+---
+name:            splat_zero_nxv8i8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv8i8
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M1_:%[0-9]+]]:vr = PseudoVMV_V_X_M1 [[DEF]], [[COPY]], -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY [[PseudoVMV_V_X_M1_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 8 x s8>) = G_SPLAT_VECTOR %3(s32)
+    $v8 = COPY %0(<vscale x 8 x s8>)
+    PseudoRET implicit $v8
+
+...
+---
+name:            splat_zero_nxv16i8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv16i8
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M2_:%[0-9]+]]:vrm2 = PseudoVMV_V_X_M2 [[DEF]], [[COPY]], -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m2 = COPY [[PseudoVMV_V_X_M2_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 16 x s8>) = G_SPLAT_VECTOR %3(s32)
+    $v8m2 = COPY %0(<vscale x 16 x s8>)
+    PseudoRET implicit $v8m2
+
+...
+---
+name:            splat_zero_nxv32i8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv32i8
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M4_:%[0-9]+]]:vrm4 = PseudoVMV_V_X_M4 [[DEF]], [[COPY]], -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m4 = COPY [[PseudoVMV_V_X_M4_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 32 x s8>) = G_SPLAT_VECTOR %3(s32)
+    $v8m4 = COPY %0(<vscale x 32 x s8>)
+    PseudoRET implicit $v8m4
+
+...
+---
+name:            splat_zero_nxv64i8
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv64i8
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M8_:%[0-9]+]]:vrm8 = PseudoVMV_V_X_M8 [[DEF]], [[COPY]], -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m8 = COPY [[PseudoVMV_V_X_M8_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 64 x s8>) = G_SPLAT_VECTOR %3(s32)
+    $v8m8 = COPY %0(<vscale x 64 x s8>)
+    PseudoRET implicit $v8m8
+
+...
+---
+name:            splat_zero_nxv1i16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv1i16
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_MF4_:%[0-9]+]]:vr = PseudoVMV_V_X_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY [[PseudoVMV_V_X_MF4_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 1 x s16>) = G_SPLAT_VECTOR %3(s32)
+    $v8 = COPY %0(<vscale x 1 x s16>)
+    PseudoRET implicit $v8
+
+...
+---
+name:            splat_zero_nxv2i16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv2i16
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_MF2_:%[0-9]+]]:vr = PseudoVMV_V_X_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY [[PseudoVMV_V_X_MF2_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 2 x s16>) = G_SPLAT_VECTOR %3(s32)
+    $v8 = COPY %0(<vscale x 2 x s16>)
+    PseudoRET implicit $v8
+
+...
+---
+name:            splat_zero_nxv4i16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv4i16
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M1_:%[0-9]+]]:vr = PseudoVMV_V_X_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY [[PseudoVMV_V_X_M1_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 4 x s16>) = G_SPLAT_VECTOR %3(s32)
+    $v8 = COPY %0(<vscale x 4 x s16>)
+    PseudoRET implicit $v8
+
+...
+---
+name:            splat_zero_nxv8i16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv8i16
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M2_:%[0-9]+]]:vrm2 = PseudoVMV_V_X_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m2 = COPY [[PseudoVMV_V_X_M2_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 8 x s16>) = G_SPLAT_VECTOR %3(s32)
+    $v8m2 = COPY %0(<vscale x 8 x s16>)
+    PseudoRET implicit $v8m2
+
+...
+---
+name:            splat_zero_nxv16i16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv16i16
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M4_:%[0-9]+]]:vrm4 = PseudoVMV_V_X_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m4 = COPY [[PseudoVMV_V_X_M4_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 16 x s16>) = G_SPLAT_VECTOR %3(s32)
+    $v8m4 = COPY %0(<vscale x 16 x s16>)
+    PseudoRET implicit $v8m4
+
+...
+---
+name:            splat_zero_nxv32i16
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv32i16
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M8_:%[0-9]+]]:vrm8 = PseudoVMV_V_X_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m8 = COPY [[PseudoVMV_V_X_M8_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 32 x s16>) = G_SPLAT_VECTOR %3(s32)
+    $v8m8 = COPY %0(<vscale x 32 x s16>)
+    PseudoRET implicit $v8m8
+
+...
+---
+name:            splat_zero_nxv1i32
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv1i32
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_MF2_:%[0-9]+]]:vr = PseudoVMV_V_X_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY [[PseudoVMV_V_X_MF2_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 1 x s32>) = G_SPLAT_VECTOR %1(s32)
+    $v8 = COPY %0(<vscale x 1 x s32>)
+    PseudoRET implicit $v8
+
+...
+---
+name:            splat_zero_nxv2i32
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv2i32
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M1_:%[0-9]+]]:vr = PseudoVMV_V_X_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY [[PseudoVMV_V_X_M1_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 2 x s32>) = G_SPLAT_VECTOR %1(s32)
+    $v8 = COPY %0(<vscale x 2 x s32>)
+    PseudoRET implicit $v8
+
+...
+---
+name:            splat_zero_nxv4i32
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv4i32
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M2_:%[0-9]+]]:vrm2 = PseudoVMV_V_X_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m2 = COPY [[PseudoVMV_V_X_M2_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %1:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 4 x s32>) = G_SPLAT_VECTOR %1(s32)
+    $v8m2 = COPY %0(<vscale x 4 x s32>)
+    PseudoRET implicit $v8m2
+
+...
+---
+name:            splat_zero_nxv8i32
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv8i32
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M4_:%[0-9]+]]:vrm4 = PseudoVMV_V_X_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m4 = COPY [[PseudoVMV_V_X_M4_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %1:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 8 x s32>) = G_SPLAT_VECTOR %1(s32)
+    $v8m4 = COPY %0(<vscale x 8 x s32>)
+    PseudoRET implicit $v8m4
+
+...
+---
+name:            splat_zero_nxv16i32
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv16i32
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_M8_:%[0-9]+]]:vrm8 = PseudoVMV_V_X_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m8 = COPY [[PseudoVMV_V_X_M8_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %1:gprb(s32) = G_CONSTANT i32 0
+    %0:vrb(<vscale x 16 x s32>) = G_SPLAT_VECTOR %1(s32)
+    $v8m8 = COPY %0(<vscale x 16 x s32>)
+    PseudoRET implicit $v8m8
+
+...
+---
+name:            splat_zero_nxv1i64
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv1i64
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[BuildPairF64Pseudo:%[0-9]+]]:fpr64 = BuildPairF64Pseudo [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVFMV_V_FPR64_M1_:%[0-9]+]]:vr = PseudoVFMV_V_FPR64_M1 [[DEF]], [[BuildPairF64Pseudo]], -1, 6 /* e64 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY [[PseudoVFMV_V_FPR64_M1_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %2:gprb(s32) = G_CONSTANT i32 0
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %1:fprb(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %0:vrb(<vscale x 1 x s64>) = G_SPLAT_VECTOR %1(s64)
+    $v8 = COPY %0(<vscale x 1 x s64>)
+    PseudoRET implicit $v8
+
+...
+---
+name:            splat_zero_nxv2i64
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv2i64
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[BuildPairF64Pseudo:%[0-9]+]]:fpr64 = BuildPairF64Pseudo [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVFMV_V_FPR64_M2_:%[0-9]+]]:vrm2 = PseudoVFMV_V_FPR64_M2 [[DEF]], [[BuildPairF64Pseudo]], -1, 6 /* e64 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m2 = COPY [[PseudoVFMV_V_FPR64_M2_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m2
+    %2:gprb(s32) = G_CONSTANT i32 0
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %1:fprb(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %0:vrb(<vscale x 2 x s64>) = G_SPLAT_VECTOR %1(s64)
+    $v8m2 = COPY %0(<vscale x 2 x s64>)
+    PseudoRET implicit $v8m2
+
+...
+---
+name:            splat_zero_nxv4i64
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv4i64
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[BuildPairF64Pseudo:%[0-9]+]]:fpr64 = BuildPairF64Pseudo [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVFMV_V_FPR64_M4_:%[0-9]+]]:vrm4 = PseudoVFMV_V_FPR64_M4 [[DEF]], [[BuildPairF64Pseudo]], -1, 6 /* e64 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m4 = COPY [[PseudoVFMV_V_FPR64_M4_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m4
+    %2:gprb(s32) = G_CONSTANT i32 0
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %1:fprb(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %0:vrb(<vscale x 4 x s64>) = G_SPLAT_VECTOR %1(s64)
+    $v8m4 = COPY %0(<vscale x 4 x s64>)
+    PseudoRET implicit $v8m4
+
+...
+---
+name:            splat_zero_nxv8i64
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv8i64
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[BuildPairF64Pseudo:%[0-9]+]]:fpr64 = BuildPairF64Pseudo [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVFMV_V_FPR64_M8_:%[0-9]+]]:vrm8 = PseudoVFMV_V_FPR64_M8 [[DEF]], [[BuildPairF64Pseudo]], -1, 6 /* e64 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8m8 = COPY [[PseudoVFMV_V_FPR64_M8_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8m8
+    %2:gprb(s32) = G_CONSTANT i32 0
+    %3:gprb(s32) = G_CONSTANT i32 0
+    %1:fprb(s64) = G_MERGE_VALUES %2(s32), %3(s32)
+    %0:vrb(<vscale x 8 x s64>) = G_SPLAT_VECTOR %1(s64)
+    $v8m8 = COPY %0(<vscale x 8 x s64>)
+    PseudoRET implicit $v8m8
+
+...
+---
+name:            splat_zero_nxv1f32
+legalized:       true
+regBankSelected: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: splat_zero_nxv1f32
+    ; CHECK: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: [[FMV_W_X:%[0-9]+]]:fpr32 = FMV_W_X [[COPY]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[FMV_W_X]]
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF
+    ; CHECK-NEXT: [[PseudoVMV_V_X_MF2_:%[0-9]+]]:vr = PseudoVMV_V_X_MF2 [[DEF]], [[COPY1]], -1, 5 /* e32 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY [[PseudoVMV_V_X_MF2_]]
+    ; CHECK-NEXT: PseudoRET implicit $v8
+    %1:fprb(s32) = G_FCONSTANT float 0.000000e+00
+    %2:gprb(s32) = COPY %1(s32)
+    %0:vrb(<vscale x 1 x s32>) = G_SPLAT_VECTOR %2(s32)
+    $v8 = COPY %0(<vscale x 1 x s32>)
+    PseudoRET implicit $v8
+
+...
+---
+name:   ...
[truncated]

michaelmaitland · 2024-10-04T18:44:05Z

@tobias-stadler @aemerson I am including you here since I think this patch relates back to #108991. Here is a case where lowering from G_SPLAT_VECTOR to G_VMV_V_X_VL or G_VFMV_V_F_VL needs to happen, but the question is where and how.

We don't do this conversion in the legalizer so that the generic combiner can simplify G_SPLAT_VECTOR.
We don't do this conversion in the post legalizer based on the conversation in [RISCV][GISEL] Introduce the RISCVPostLegalizerLowering pass #108991.
So I put it in instruction select pre-isel-lower, similiar to what we do for SelectionDAG.

I was under the impression that we can introduce new generic instructions during instruction selection. It seems like the restriction is that we must call select on them. Here is a case that highlights the awkwardness of this. The big problem here is that the VL operand of the G_VMV_V_X_VL or G_VFMV_V_F_VL instructions cannot be selected before we select the RISCV instruction, otherwise it will break renderVLOp. So we have to awkwardly select instructions after the fact by remembering what we introduced.

It would be really nice for the InstructionSelector to know that instructions need to be selected after selecting the current instruction.

topperc

Need an end to end IR test.

llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp

topperc · 2024-10-04T18:48:40Z

llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp

+    const LLT sXLen = LLT::scalar(STI.getXLen());
+    if (IsGPRSplat && TypeSize::isKnownLT(MRI->getType(Scalar).getSizeInBits(),
+                                          sXLen.getSizeInBits()))
+      Scalar = MIB.buildAnyExt(sXLen, Scalar).getReg(0);


Why doesn't this G_ANYEXT need special selection like the passthru and vlmax?

I'm fairly confident that we never hit this case. I brought it over from selectiondag, but still looking for a test case, otherwise I will remove it. Maybe we are already doing it in legalization.

I changed this case to an assert, since we don't seem to be generating this case yet.

michaelmaitland · 2024-10-04T18:51:49Z

Need an end to end IR test.

added.

topperc · 2024-10-04T18:55:37Z

llvm/test/CodeGen/RISCV/GlobalISel/rvv/splat-vector.ll

@@ -67,7 +67,428 @@ define <vscale x 64 x i1> @splat_zero_nxv64i1() {
  ret <vscale x 64 x i1> zeroinitializer
 }

+define <vscale x 1 x i8> @splat_zero_nxv1i8() {


Add tests splatting a scalar

We are not there yet. We need G_EXTRACT_VECTOR_ELT and G_INSERT_VECTOR_ELT

I think that means we're missing a combine to delete a nop G_INSERT_VECTOR_ELT+ G_EXTRACT_VECTOR_ELT pair?

Thats correct.

CC: @jiahanxie353 (we came across this)

Do you mean:

def insert_vector_element_extract_vector_element : GICombineRule< (defs root:$root), (match (G_EXTRACT_VECTOR_ELT $elt, $src, $idx), (G_INSERT_VECTOR_ELT $root, $src, $elt, $idx)), (apply (GIReplaceReg $root, $src))>;

?

So it should be this combine

def extract_vector_element_not_const : GICombineRule< (defs root:$root), (match (G_INSERT_VECTOR_ELT $src, $x, $value, $idx), (G_EXTRACT_VECTOR_ELT $root, $src, $idx)), (apply (GIReplaceReg $root, $value))>

def extract_vector_element_not_const : GICombineRule< (defs root:$root), (match (G_INSERT_VECTOR_ELT $src, $x, $value, $idx), (G_EXTRACT_VECTOR_ELT $root, $src, $idx)), (apply (GIReplaceReg $root, $value))>; def extract_vector_element_different_indices : GICombineRule< (defs root:$root, build_fn_matchinfo:$matchinfo), (match (G_INSERT_VECTOR_ELT $src, $x, $value, $idx2), (G_EXTRACT_VECTOR_ELT $root, $src, $idx1), [{ return Helper.matchExtractVectorElementWithDifferentIndices(${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>;

Which is failing in my local test because the indices weren't CSEd

%4:_(s64) = G_CONSTANT i64 0 %2:_(<vscale x 2 x s32>) = G_INSERT_VECTOR_ELT %3:_, %0:_(s32), %4:_(s64) %5:_(s64) = G_CONSTANT i64 0 %6:_(s32) = G_EXTRACT_VECTOR_ELT %2:_(<vscale x 2 x s32>), %5:_(s64)

You are really fast.

added requested tests.

topperc · 2024-10-04T19:07:02Z

llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp

+  // the MI is lowered, since renderVLOp needs to see the G_CONSTANT. It would
+  // be nice if the InstructionSelector selected these instructions without
+  // needing to call select on them explicitly.
+  if (OpcWasGSplatVector) {


How about adding earlySelect like AArch64 and put all of the SPLAT_VECTOR code together in there instead of splitting it across preISelLower and this code?

tobias-stadler · 2024-10-04T20:02:31Z

It would be really nice for the InstructionSelector to know that instructions need to be selected after selecting the current instruction.

I do have a prototype for this, but I am not sure that the additional complexity it adds to InstructionSelect is worth it compared to just calling select recursively.

preISelLower is intended for slightly changing an instruction and then continuing selection for that changed instruction. Here, we probably want to create new generic instructions, recursively call select on them and then return from the original select call instead of continuing selection. Introducing earlySelect() would be an option, but this exists primarily for overriding the imported SelectionDAG patterns. Is this necessary here, does SelectionDAG even have imported patterns for G_SPLAT_VECTOR?
Can't we just add this lowering code to a function and call it from the normal selection switch statement?

topperc · 2024-10-04T20:09:47Z

It would be really nice for the InstructionSelector to know that instructions need to be selected after selecting the current instruction.

I do have a prototype for this, but I am not sure that the additional complexity it adds to InstructionSelect is worth it compared to just calling select recursively.

preISelLower is intended for slightly changing an instruction and then continuing selection for that changed instruction. Here, we probably want to create the generic instructions, recursively call select on them and then return from the original select call instead of continuing selection. Introducing earlySelect() would be an option, but this exists primarily for overriding the imported SelectionDAG patterns. Is this necessary here, does SelectionDAG even have imported patterns for G_SPLAT_VECTOR? Can't we just add this lowering code to a function and call it from the normal selection switch statement?

SelectionDAG does not have patterns for G_SPLAT_VECTOR. It only has patterns for G_VMV_V_X_VL and G_VFMV_V_F_VL. As noted earlier, SelectionDAG has a pre-isel peephole to change the opcode. This is done because VP intrinsics directly use the G_VMV_V_X_VL and G_VFMV_V_F_VL opcodes since they come with a VL. Generic shufflevector does not come with a VL so use SPLAT_VECTOR. We standardize on one opcode using the peephole to reduce the isel table size.

There are something like 14 different opcodes that need to be selected based on the opcode and the type. Since those are already in the isel table we would like to use that and not repeat the 14 opcodes manually.

I'm not even sure doing the switch from G_SPLAT_VECTOR to G_VMV_V_X_VL and G_VFMV_V_F_VL makes sense. In SelectionDAG, we do the switch before any instructions are selected. We have patterns to fold vector arithmetic with a splat operand into special instructions too. Those patterns expect G_VMV_V_X_VL/G_VFMV_V_F_VL. If we don't do the switch until the G_SPLAT_VECTOR is eligible for selection, these arithmetic patterns won't match. I believe those patterns use a ComplexPattern to match the operand so maybe we can also check SPLAT_VECTOR there for GISel and this won't really be a problem.

llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp

tobias-stadler · 2024-10-04T20:35:29Z

SelectionDAG does not have patterns for G_SPLAT_VECTOR. It only has patterns for G_VMV_V_X_VL and G_VFMV_V_F_VL. As noted earlier, SelectionDAG has a pre-isel peephole to change the opcode. This is done because VP intrinsics directly use the G_VMV_V_X_VL and G_VFMV_V_F_VL opcodes since they come with a VL. Generic shufflevector does not come with a VL so use SPLAT_VECTOR. We standardize on one opcode using the peephole to reduce the isel table size.

There are something like 14 different opcodes that need to be selected based on the opcode and the type. Since those are already in the isel table we would like to use that and not repeat the 14 opcodes manually.

Then we don't need to put this into earlySelect(). Can we just add the following (pseudo code) to the big selection switch?

case G_SPLAT_VECTOR:
MI1 = buildConstant();
MI2 = build(G_VMV...)

if(!select(MI2)) return false;
return select(MI1);

In this way we select G_VMV before the constant and G_VMV is selected by the imported SelectionDAG patterns. No preISelLower hack necessary.
EDIT: added error handling. that would be the primary convenience of just letting InstructionSelect handle this.

I'm not even sure doing the switch from G_SPLAT_VECTOR to G_VMV_V_X_VL and G_VFMV_V_F_VL makes sense. In SelectionDAG, we do the switch before any instructions are selected. We have patterns to fold vector arithmetic with a splat operand into special instructions too. Those patterns expect G_VMV_V_X_VL/G_VFMV_V_F_VL. If we don't do the switch until the G_SPLAT_VECTOR is eligible for selection, these arithmetic patterns won't match. I believe those patterns use a ComplexPattern to match the operand so maybe we can also check SPLAT_VECTOR there for GISel and this won't really be a problem.

In my opinion matching target-specific opcodes is a hack, and we should want to fix this for GISel by matching the generic opcode. No idea if the importer can handle this though.

topperc · 2024-10-04T20:42:39Z

In my opinion matching target-specific opcodes is a hack, and we should want to fix this for GISel by matching the generic opcode. No idea if the importer can handle this though.

I don't follow. SelectionDAG is turning ISD::SPLAT_VECTOR into RISCVISD::VMV_V_X_VL or RISCVISD::VFMV_V_X_VL. We're never matching selected opcodes.

tobias-stadler · 2024-10-04T20:54:35Z

I don't follow. SelectionDAG is turning ISD::SPLAT_VECTOR into RISCVISD::VMV_V_X_VL or RISCVISD::VFMV_V_X_VL. We're never matching selected opcodes.

I'm talking about this:

We have patterns to fold vector arithmetic with a splat operand into special instructions too. Those patterns expect G_VMV_V_X_VL/G_VFMV_V_F_VL

What I meant is that, I think we should prefer matching completely generic opcodes like SPLAT_VECTOR in the arithmetic patterns (like you suggested as a possible solution) rather than matching the generic, but target-specifc opcodes like G_VMV. sorry for not phrasing this clearly.

topperc · 2024-10-04T21:02:22Z

I don't follow. SelectionDAG is turning ISD::SPLAT_VECTOR into RISCVISD::VMV_V_X_VL or RISCVISD::VFMV_V_X_VL. We're never matching selected opcodes.

I'm talking about this:

We have patterns to fold vector arithmetic with a splat operand into special instructions too. Those patterns expect G_VMV_V_X_VL/G_VFMV_V_F_VL

What I meant is that, I think we should prefer matching generic opcodes like SPLAT_VECTOR in the arithmetic patterns (like you suggested) rather than matching the generic target-specifc opcodes. sorry for not phrasing this clearly.

I think there will still be cases where we need to match a target specific opcode that carries the VL operand. Maybe we can use a GISel equivalent of ISD::EXPERIMENTAL_VP_SPLAT these days. We didn't originally have vp.splat intrinsic or target-independent ISD opcode in SelectionDAG.

tobias-stadler · 2024-10-04T21:37:16Z

Thanks for the explanation! If I correctly remember a short discussion from the EuroLLVM GISel round-table, the goal is to introduce generic opcodes for all generic LLVM intrinsics instead of using G_INTRINISC for them. So I guess we will eventually get a G_EXPERIMENTAL_VP_SPLAT anyways?

michaelmaitland · 2024-10-07T13:56:46Z

I have updated this patch, with some code from @topperc to fix the CSE issue.

tschuett · 2024-10-10T18:21:54Z

I reserve the right to ask stupid questions. Why are you building G_X instructions in the RISC-V instruction selector? Why bother with target independent instructions in the RISC-V selector. You know how the select a G_CONSTANT. Instead of building a G_CONSTANT, can't you just invoke let's say selectRISCVIConstant();

It should be an anti-pattern to build G_X instructions in selectors. You run in legality issues again.

topperc · 2024-10-10T18:30:00Z

I reserve the right to ask stupid questions. Why are you building G_X instructions in the RISC-V instruction selector? Why bother with target independent instructions in the RISC-V selector. You know how the select a G_CONSTANT. Instead of building a G_CONSTANT, can't you just invoke let's say selectRISCVIConstant();

We want the constant to be folded as an operand of the final instruction. It needs to be visible to a complex render function that is called by the isel table. selectRISCVIConstant creates the constant in a separate register.

arsenm · 2024-10-10T18:46:03Z

We want the constant to be folded as an operand of the final instruction. It needs to be visible to a complex render function that is called by the isel table. selectRISCVIConstant creates the constant in a separate register.

So the issue is vector constants look different in the DAG and GISel? Could we have the pattern importer start to recognize the G_SPLAT_VECTOR case, instead of the G_BUILD_VECTOR of constants?

topperc · 2024-10-10T18:59:04Z

We want the constant to be folded as an operand of the final instruction. It needs to be visible to a complex render function that is called by the isel table. selectRISCVIConstant creates the constant in a separate register.

So the issue is vector constants look different in the DAG and GISel? Could we have the pattern importer start to recognize the G_SPLAT_VECTOR case, instead of the G_BUILD_VECTOR of constants?

I don't think we're talking about vector constants here. The code calls buildConstant(sXLen, -1) to create an all ones constant for number of vector elements to process where all ones means all elements.

SelectionDAG has preisel peephole that turns SPLAT_VECTOR into RISCVISD::VMV_V_X_VL with an all ones vector elements count to reduce the number of entries in the DAG isel table. This code is trying to emulate that to keep using the SelectionDAG patterns.

tschuett · 2024-10-11T19:22:51Z

void RISCVInstructionSelector::renderImm(MachineInstrBuilder &MIB,
                                         const MachineInstr &MI,
                                         int OpIdx) const {
  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
         "Expected G_CONSTANT");
  int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue();
  MIB.addImm(CstVal);
}

Why must MI be a G_CONSTANT? Couldn't it be instead a RISCV::IConstant (sorry for the bad name)? To avoid building G_X in the instruction selector.

topperc · 2024-10-11T21:11:36Z

void RISCVInstructionSelector::renderImm(MachineInstrBuilder &MIB,
                                         const MachineInstr &MI,
                                         int OpIdx) const {
  assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
         "Expected G_CONSTANT");
  int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue();
  MIB.addImm(CstVal);
}

Why must MI be a G_CONSTANT? Couldn't it be instead a RISCV::IConstant (sorry for the bad name)? To avoid building G_X in the instruction selector.

So you trust us to build a RISCV::IConstant with legal operands, but you don't trust us to know what G_CONSTANT is legal?

We still need to support G_CONSTANT too for future patches that will create G_VMV_V_X_VL/G_VFMV_F_VL from intrinsics.

tschuett · 2024-10-11T22:28:40Z

Sure I trust you. But you build the G_CONSTANT just for:

int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue();

topperc · 2024-10-11T22:37:47Z

Sure I trust you. But you build the G_CONSTANT just for:
int64_t CstVal = MI.getOperand(1).getCImm()->getSExtValue();

I didn't notice what render function you pasted there. That's unrelated to this patch. It's used by our scalar ADDIW/SRLIW/SRAIW/SLLIW to hack around some weird type inferencing issues. GISel has i32 as a legal type for RV64, SelectionDAG doesn't. See #68470]

The one used by this patch is RISCVInstructionSelector::renderVLOp

michaelmaitland · 2024-10-15T17:25:13Z

Where did the discussion on this end up? Should we take this patch so we get selection working and revisit the approach / discuss in the future?

topperc · 2024-10-15T17:39:16Z

Where did the discussion on this end up? Should we take this patch so we get selection working and revisit the approach / discuss in the future?

I'm starting to think we should add G_SPLAT_VECTOR patterns to RISCVGISel.td and revisit in the future if the isel table gets too large.

michaelmaitland · 2025-03-28T13:55:46Z

I don't plan to continue working on this, and it sounds like preferred approach is to add patterns for G_SPLAT_VECTOR, which is different than the approach taken here. As a result, I am going to close this PR.

[RISCV][GISEL] instruction-select for G_SPLAT_VECTOR

85cf231

SelectionDAG lowers ISD::SPLAT_VECTOR in a pre-instruction-select step too. By taking this approach, we allow the generic combiner to operate on G_SPLAT_VECTOR instead of RISCV Generic Opcodes.

michaelmaitland added backend:RISC-V llvm:globalisel labels Oct 4, 2024

michaelmaitland requested review from arsenm, tschuett, topperc, tobias-stadler and aemerson October 4, 2024 18:38

fixup! add end-to-end tests

6a91a29

topperc reviewed Oct 4, 2024

View reviewed changes

fixup! only do this when we start with G_SPLAT_VECTOR

5d77932

topperc reviewed Oct 4, 2024

View reviewed changes

tobias-stadler reviewed Oct 4, 2024

View reviewed changes

llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp Outdated Show resolved Hide resolved

fixup! opc needs to be loaded after preISelLower

c585712

fixup! add more tests

7ac9ecb

michaelmaitland force-pushed the isel-gsplat branch from 1df86ec to 03dbda1 Compare October 7, 2024 14:09

fixup! use earlySelect

d4e1864

michaelmaitland force-pushed the isel-gsplat branch from 03dbda1 to d4e1864 Compare October 7, 2024 14:14

fixup! change extend to assert

c0ebcaa

michaelmaitland force-pushed the isel-gsplat branch from fd8989e to c0ebcaa Compare October 7, 2024 14:30

michaelmaitland closed this Mar 28, 2025

[RISCV][GISEL] instruction-select for G_SPLAT_VECTOR #111193

[RISCV][GISEL] instruction-select for G_SPLAT_VECTOR #111193

Uh oh!

Conversation

michaelmaitland commented Oct 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Oct 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

michaelmaitland commented Oct 4, 2024

Uh oh!

topperc left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

michaelmaitland commented Oct 4, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

michaelmaitland Oct 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

topperc Oct 4, 2024 • edited by michaelmaitland Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

tobias-stadler commented Oct 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

topperc commented Oct 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

tobias-stadler commented Oct 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

topperc commented Oct 4, 2024

Uh oh!

tobias-stadler commented Oct 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

topperc commented Oct 4, 2024

Uh oh!

tobias-stadler commented Oct 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

michaelmaitland commented Oct 7, 2024

Uh oh!

tschuett commented Oct 10, 2024

Uh oh!

topperc commented Oct 10, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

arsenm commented Oct 10, 2024

Uh oh!

topperc commented Oct 10, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

michaelmaitland commented Oct 4, 2024 •

edited

Loading

llvmbot commented Oct 4, 2024 •

edited

Loading

michaelmaitland Oct 4, 2024 •

edited

Loading

topperc Oct 4, 2024 •

edited by michaelmaitland

Loading

tobias-stadler commented Oct 4, 2024 •

edited

Loading

topperc commented Oct 4, 2024 •

edited

Loading

tobias-stadler commented Oct 4, 2024 •

edited

Loading

tobias-stadler commented Oct 4, 2024 •

edited

Loading

tobias-stadler commented Oct 4, 2024 •

edited

Loading

topperc commented Oct 10, 2024 •

edited

Loading

topperc commented Oct 10, 2024 •

edited

Loading

topperc commented Oct 11, 2024 •

edited

Loading