-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV] Support VP_SPLAT mask operations #132345
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v Author: MingYan (NexMing) Changes(vp.splat val, mask, vl) -> (riscv_vmand_vl (select val, (riscv_vmset_vl vl), (riscv_vmclr_vl)), mask, vl) Full diff: https://github.com/llvm/llvm-project/pull/132345.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 48d8fc23dc1bb..4ba9fb1eb5832 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -810,6 +810,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_SPLAT, VT, Custom);
setOperationPromotedToType(
ISD::VECTOR_SPLICE, VT,
@@ -12773,8 +12774,18 @@ SDValue RISCVTargetLowering::lowerVPSplatExperimental(SDValue Op,
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
- SDValue Result =
- lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
+ SDValue Result;
+ if (VT.getScalarType() == MVT::i1) {
+ SDValue TrueV = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
+ SDValue FalseV = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
+ Result = DAG.getSelect(DL, VT, Val, TrueV, FalseV);
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
+ Result =
+ DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Result, Mask, VL);
+ } else {
+ Result =
+ lowerScalarSplat(SDValue(), Val, VL, ContainerVT, DL, DAG, Subtarget);
+ }
if (!VT.isFixedLengthVector())
return Result;
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splat-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splat-mask.ll
new file mode 100644
index 0000000000000..ba9eb9e9a1487
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-splat-mask.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define <vscale x 1 x i1> @vp_splat_nxv1i1_true_unmasked(i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv1i1_true_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vmset.m v0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 1 x i1> @llvm.experimental.vp.splat.nxv1i1(i1 true, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x i1> %splat
+}
+
+define <vscale x 1 x i1> @vp_splat_nxv1i1_false_unmasked(i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv1i1_false_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e8, mf8, ta, ma
+; CHECK-NEXT: vmclr.m v0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 1 x i1> @llvm.experimental.vp.splat.nxv1i1(i1 false, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x i1> %splat
+}
+
+define <vscale x 1 x i1> @vp_splat_nxv1i1(i1 %val, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv1i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
+; CHECK-NEXT: vmclr.m v8
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vmset.m v9
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v10, v10, 0
+; CHECK-NEXT: vmandn.mm v8, v8, v10
+; CHECK-NEXT: vmand.mm v9, v9, v10
+; CHECK-NEXT: vmor.mm v8, v9, v8
+; CHECK-NEXT: vmand.mm v0, v8, v0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 1 x i1> @llvm.experimental.vp.splat.nxv1i1(i1 %val, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i1> %splat
+}
+
+define <vscale x 2 x i1> @vp_splat_nxv2i1(i1 %val, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv2i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
+; CHECK-NEXT: vmclr.m v8
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vmset.m v9
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v10, v10, 0
+; CHECK-NEXT: vmandn.mm v8, v8, v10
+; CHECK-NEXT: vmand.mm v9, v9, v10
+; CHECK-NEXT: vmor.mm v8, v9, v8
+; CHECK-NEXT: vmand.mm v0, v8, v0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 2 x i1> @llvm.experimental.vp.splat.nxv2i1(i1 %val, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x i1> %splat
+}
+
+define <vscale x 4 x i1> @vp_splat_nxv4i1(i1 %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv4i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
+; CHECK-NEXT: vmclr.m v8
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vmset.m v9
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v10, v10, 0
+; CHECK-NEXT: vmandn.mm v8, v8, v10
+; CHECK-NEXT: vmand.mm v9, v9, v10
+; CHECK-NEXT: vmor.mm v8, v9, v8
+; CHECK-NEXT: vmand.mm v0, v8, v0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 4 x i1> @llvm.experimental.vp.splat.nxv4i1(i1 %val, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x i1> %splat
+}
+
+define <vscale x 8 x i1> @vp_splat_nxv8i1(i1 %val, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv8i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; CHECK-NEXT: vmclr.m v8
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vmset.m v9
+; CHECK-NEXT: vmv.v.x v10, a0
+; CHECK-NEXT: vmsne.vi v10, v10, 0
+; CHECK-NEXT: vmandn.mm v8, v8, v10
+; CHECK-NEXT: vmand.mm v9, v9, v10
+; CHECK-NEXT: vmor.mm v8, v9, v8
+; CHECK-NEXT: vmand.mm v0, v8, v0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 8 x i1> @llvm.experimental.vp.splat.nxv8i1(i1 %val, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x i1> %splat
+}
+
+define <vscale x 16 x i1> @vp_splat_nxv16i1(i1 %val, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv16i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vmclr.m v10
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmsne.vi v11, v8, 0
+; CHECK-NEXT: vmset.m v8
+; CHECK-NEXT: vmandn.mm v9, v10, v11
+; CHECK-NEXT: vmand.mm v8, v8, v11
+; CHECK-NEXT: vmor.mm v8, v8, v9
+; CHECK-NEXT: vmand.mm v0, v8, v0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 16 x i1> @llvm.experimental.vp.splat.nxv16i1(i1 %val, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x i1> %splat
+}
+
+define <vscale x 32 x i1> @vp_splat_nxv32i1(i1 %val, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
+; CHECK-NEXT: vmclr.m v12
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmsne.vi v13, v8, 0
+; CHECK-NEXT: vmset.m v8
+; CHECK-NEXT: vmandn.mm v9, v12, v13
+; CHECK-NEXT: vmand.mm v8, v8, v13
+; CHECK-NEXT: vmor.mm v8, v8, v9
+; CHECK-NEXT: vmand.mm v0, v8, v0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 32 x i1> @llvm.experimental.vp.splat.nxv32i1(i1 %val, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x i1> %splat
+}
+
+define <vscale x 64 x i1> @vp_splat_nxv64i1(i1 %val, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_splat_nxv64i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma
+; CHECK-NEXT: vmclr.m v16
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmsne.vi v17, v8, 0
+; CHECK-NEXT: vmset.m v8
+; CHECK-NEXT: vmandn.mm v9, v16, v17
+; CHECK-NEXT: vmand.mm v8, v8, v17
+; CHECK-NEXT: vmor.mm v8, v8, v9
+; CHECK-NEXT: vmand.mm v0, v8, v0
+; CHECK-NEXT: ret
+ %splat = call <vscale x 64 x i1> @llvm.experimental.vp.splat.nxv64i1(i1 %val, <vscale x 64 x i1> %m, i32 %evl)
+ ret <vscale x 64 x i1> %splat
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
Support VP_SPLAT mask operations, convert it to select operation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
I don't seem to have merge permissions. Can you help merge it? |
When val is a constant, it will:
(vp.splat val, mask, vl) -> (select val, (riscv_vmset_vl vl), (riscv_vmclr_vl vl))
Otherwise:
(vp.splat val, mask, vl) -> (vmsne_vl (vmv_v_x_vl (zext val), vl), splat(zero), mask, vl)