-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[RISCV][GISEL] Legalize G_EXTRACT_SUBVECTOR #109426
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][GISEL] Legalize G_EXTRACT_SUBVECTOR #109426
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-risc-v Author: Michael Maitland (michaelmaitland) ChangesThis is heavily based on the SelectionDAG lowerEXTRACT_SUBVECTOR code. Patch is 27.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/109426.diff 5 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 132b7ec9aeef7c..d9f3f4ab3935d3 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -800,6 +800,17 @@ class GInsertVectorElement : public GenericMachineInstr {
}
};
+/// Represents an extract subvector.
+class GExtractSubvector : public GenericMachineInstr {
+public:
+ Register getSrcVec() const { return getOperand(1).getReg(); }
+ uint64_t getIndexImm() const { return getOperand(2).getImm(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_EXTRACT_SUBVECTOR;
+ }
+};
+
/// Represents a freeze.
class GFreeze : public GenericMachineInstr {
public:
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 055193bcc2c8db..db53bc409392bd 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -597,6 +597,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
SplatActions.clampScalar(1, sXLen, sXLen);
+ getActionDefinitionsBuilder(G_EXTRACT_SUBVECTOR)
+ .customIf(typeIsLegalBoolVec(0, BoolVecTys, ST))
+ .customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST));
+
getLegacyLegalizerInfo().computeTables();
}
@@ -931,6 +935,133 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
return true;
}
+static LLT getLMUL1Ty(LLT VecTy) {
+ assert(VecTy.getElementType().getSizeInBits() <= 64 &&
+ "Unexpected vector LLT");
+ return LLT::scalable_vector(RISCV::RVVBitsPerBlock /
+ VecTy.getElementType().getSizeInBits(),
+ VecTy.getElementType());
+}
+
+bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
+ MachineIRBuilder &MIB) const {
+ GExtractSubvector &ES = cast<GExtractSubvector>(MI);
+
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+
+ Register Dst = ES.getReg(0);
+ Register Src = ES.getSrcVec();
+ uint64_t Idx = ES.getIndexImm();
+
+ // Only support vectors using custom legalization. We know the DstTy is a
+ // vector since we used that to decide whether to custom legalize or not.
+ LLT BigTy = MRI.getType(Src);
+ if (BigTy.isScalar())
+ return false;
+
+ LLT LitTy = MRI.getType(Dst);
+ Register Vec = Src;
+
+ // We don't have the ability to slide mask vectors down indexed by their i1
+ // elements; the smallest we can do is i8. Often we are able to bitcast to
+ // equivalent i8 vectors.
+ if (LitTy.getElementType() == LLT::scalar(1) && Idx != 0) {
+ auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue();
+ auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue();
+ if (BigTyMinElts >= 8 && LitTyMinElts >= 8) {
+ assert(Idx % 8 == 0 && "Invalid index");
+ assert(BigTyMinElts % 8 == 0 && LitTyMinElts % 8 == 0 &&
+ "Unexpected mask vector lowering");
+ Idx /= 8;
+ BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8);
+ LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8);
+ Vec = MIB.buildBitcast(BigTy, Vec).getReg(0);
+ } else {
+ // We can't slide this mask vector up indexed by its i1 elements.
+ // This poses a problem when we wish to insert a scalable vector which
+ // can't be re-expressed as a larger type. Just choose the slow path and
+ // extend to a larger type, then truncate back down.
+ LLT ExtBigTy = BigTy.changeElementType(LLT::scalar(8));
+ LLT ExtLitTy = LitTy.changeElementType(LLT::scalar(8));
+ auto BigZExt = MIB.buildZExt(ExtBigTy, Vec);
+ auto ExtractZExt = MIB.buildExtractSubvector(ExtLitTy, BigZExt, Idx);
+ auto SplatZero = MIB.buildSplatVector(
+ ExtLitTy, MIB.buildConstant(ExtLitTy.getElementType(), 0));
+ MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, ExtractZExt, SplatZero);
+ MI.eraseFromParent();
+ return true;
+ }
+ }
+
+ // With an index of 0 this is a cast-like subvector, which can be performed
+ // with subregister operations.
+ if (Idx == 0)
+ return true;
+
+ // extract_subvector scales the index by vscale if the subvector is scalable,
+ // and decomposeSubvectorInsertExtractToSubRegs takes this into account.
+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
+ MVT LitTyMVT = getMVTForLLT(LitTy);
+ unsigned SubRegIdx;
+ ElementCount RemIdx;
+ auto Decompose =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ getMVTForLLT(BigTy), LitTyMVT, Idx, TRI);
+ SubRegIdx = Decompose.first;
+ RemIdx = ElementCount::getScalable(Decompose.second);
+
+ // If the Idx has been completely eliminated then this is a subvector extract
+ // which naturally aligns to a vector register. These can easily be handled
+ // using subregister manipulation.
+ // TODO: add tests
+ if (RemIdx.isZero())
+ return true;
+
+ // Else LitTy is M1 or smaller and may need to be slid down: if LitTy
+ // was > M1 then the index would need to be a multiple of VLMAX, and so would
+ // divide exactly.
+ assert(
+ RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(LitTyMVT)).second ||
+ RISCVTargetLowering::getLMUL(LitTyMVT) == RISCVII::VLMUL::LMUL_1);
+
+ // If the vector type is an LMUL-group type, extract a subvector equal to the
+ // nearest full vector register type.
+ LLT InterLitTy = BigTy;
+ if (TypeSize::isKnownGT(BigTy.getSizeInBits(),
+ getLMUL1Ty(BigTy).getSizeInBits())) {
+ // If BigTy has an LMUL > 1, then LitTy should have a smaller LMUL, and
+ // we should have successfully decomposed the extract into a subregister.
+ assert(SubRegIdx != RISCV::NoSubRegister);
+ InterLitTy = getLMUL1Ty(BigTy);
+ // SDAG builds a TargetExtractSubreg. A Copy with SubReg specified on the
+ // source Register is the equivalent.
+ Vec = MIB.buildInstr(TargetOpcode::COPY, {InterLitTy}, {})
+ .addReg(Vec, 0, SubRegIdx)
+ .getReg(0);
+ }
+
+ // Slide this vector register down by the desired number of elements in order
+ // to place the desired subvector starting at element 0.
+ const LLT XLenTy(STI.getXLenVT());
+ auto SlidedownAmt = MIB.buildVScale(XLenTy, RemIdx.getKnownMinValue());
+ auto [Mask, VL] = buildDefaultVLOps(LitTy, MIB, MRI);
+ uint64_t Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
+ auto Slidedown = MIB.buildInstr(
+ RISCV::G_VSLIDEDOWN_VL, {InterLitTy},
+ {MIB.buildUndef(InterLitTy), Vec, SlidedownAmt, Mask, VL, Policy});
+
+ // Now the vector is in the right position, extract our final subvector. This
+ // should resolve to a COPY.
+ auto Extract = MIB.buildExtractSubvector(LitTy, Slidedown, 0);
+
+ // We might have bitcast from a mask type: cast back to the original type if
+ // required.
+ MIB.buildBitcast(Dst, Extract);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool RISCVLegalizerInfo::legalizeCustom(
LegalizerHelper &Helper, MachineInstr &MI,
LostDebugLocObserver &LocObserver) const {
@@ -1001,6 +1132,8 @@ bool RISCVLegalizerInfo::legalizeCustom(
return legalizeExt(MI, MIRBuilder);
case TargetOpcode::G_SPLAT_VECTOR:
return legalizeSplatVector(MI, MIRBuilder);
+ case TargetOpcode::G_EXTRACT_SUBVECTOR:
+ return legalizeExtractSubvector(MI, MIRBuilder);
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
return legalizeLoadStore(MI, Helper, MIRBuilder);
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index 2fc28615e7630d..d2afb175ae42bb 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -46,6 +46,7 @@ class RISCVLegalizerInfo : public LegalizerInfo {
bool legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const;
bool legalizeExt(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const;
bool legalizeSplatVector(MachineInstr &MI, MachineIRBuilder &MIB) const;
+ bool legalizeExtractSubvector(MachineInstr &MI, MachineIRBuilder &MIB) const;
bool legalizeLoadStore(MachineInstr &MI, LegalizerHelper &Helper,
MachineIRBuilder &MIB) const;
};
diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
index ba40662c49c1df..b8641418aff747 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td
@@ -57,3 +57,13 @@ def G_SPLAT_VECTOR_SPLIT_I64_VL : RISCVGenericInstruction {
let InOperandList = (ins type0:$passthru, type1:$hi, type1:$lo, type2:$vl);
let hasSideEffects = false;
}
+
+// Pseudo equivalent to a RISCVISD::VSLIDEDOWN_VL
+def G_VSLIDEDOWN_VL : RISCVGenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$merge, type0:$vec, type1:$idx, type2:$mask,
+ type3:$vl, type4:$policy);
+ let hasSideEffects = false;
+}
+def : GINodeEquiv<G_VSLIDEDOWN_VL, riscv_slidedown_vl>;
+
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
new file mode 100644
index 00000000000000..78a2f82632d96a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
@@ -0,0 +1,339 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,RV32
+# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s --check-prefixes=CHECK,RV64
+
+# Special handling for i1-element vectors with non-zero index
+---
+name: extract_subvector_nxv4i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: extract_subvector_nxv4i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C2]](s32)
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
+ ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 4 x s8>), 0
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: extract_subvector_nxv4i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SELECT [[DEF]](<vscale x 4 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C2]](s64)
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 4 x s8>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 4 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
+ ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 4 x s8>), 0
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 4 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_EXTRACT_SUBVECTOR %0(<vscale x 4 x s1>), 2
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: extract_subvector_nxv8i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: extract_subvector_nxv8i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[READ_VLENB:%[0-9]+]]:_(s32) = G_READ_VLENB
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
+ ; RV32-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[READ_VLENB]], [[C2]](s32)
+ ; RV32-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; RV32-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s32), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
+ ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 8 x s8>), 0
+ ; RV32-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
+ ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[C3]](s32)
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: extract_subvector_nxv8i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 8 x s8>) = G_SELECT [[DEF]](<vscale x 8 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[READ_VLENB:%[0-9]+]]:_(s64) = G_READ_VLENB
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; RV64-NEXT: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[READ_VLENB]], [[C2]](s64)
+ ; RV64-NEXT: [[VMSET_VL:%[0-9]+]]:_(<vscale x 2 x s1>) = G_VMSET_VL $x0
+ ; RV64-NEXT: [[DEF1:%[0-9]+]]:_(<vscale x 8 x s8>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[VSLIDEDOWN_VL:%[0-9]+]]:_(<vscale x 8 x s8>) = G_VSLIDEDOWN_VL [[DEF1]], [[SELECT]], [[LSHR]](s64), [[VMSET_VL]](<vscale x 2 x s1>), $x0, 3
+ ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 2 x s8>) = G_EXTRACT_SUBVECTOR [[VSLIDEDOWN_VL]](<vscale x 8 x s8>), 0
+ ; RV64-NEXT: [[BITCAST:%[0-9]+]]:_(<vscale x 2 x s8>) = G_BITCAST [[EXTRACT_SUBVECTOR]](<vscale x 2 x s8>)
+ ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 2 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 2 x s1>) = G_ICMP intpred(ne), [[BITCAST]](<vscale x 2 x s8>), [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 2 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 8 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 2 x s1>) = G_EXTRACT_SUBVECTOR %0(<vscale x 8 x s1>), 2
+ $v8 = COPY %1(<vscale x 2 x s1>)
+ PseudoRET implicit $v8
+...
+---
+name: extract_subvector_nxv64i1
+legalized: false
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ ; RV32-LABEL: name: extract_subvector_nxv64i1
+ ; RV32: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C]](s32)
+ ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[C1]](s32)
+ ; RV32-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV32-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_EXTRACT_SUBVECTOR [[SELECT]](<vscale x 64 x s8>), 16
+ ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[C2]](s32)
+ ; RV32-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[EXTRACT_SUBVECTOR]](<vscale x 4 x s8>), [[SPLAT_VECTOR2]]
+ ; RV32-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV32-NEXT: PseudoRET implicit $v8
+ ;
+ ; RV64-LABEL: name: extract_subvector_nxv64i1
+ ; RV64: [[DEF:%[0-9]+]]:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT]](s64)
+ ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SPLAT_VECTOR [[ANYEXT1]](s64)
+ ; RV64-NEXT: [[SELECT:%[0-9]+]]:_(<vscale x 64 x s8>) = G_SELECT [[DEF]](<vscale x 64 x s1>), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]]
+ ; RV64-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s8>) = G_EXTRACT_SUBVECTOR [[SELECT]](<vscale x 64 x s8>), 16
+ ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32)
+ ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_(<vscale x 4 x s8>) = G_SPLAT_VECTOR [[ANYEXT2]](s64)
+ ; RV64-NEXT: [[ICMP:%[0-9]+]]:_(<vscale x 4 x s1>) = G_ICMP intpred(ne), [[EXTRACT_SUBVECTOR]](<vscale x 4 x s8>), [[SPLAT_VECTOR2]]
+ ; RV64-NEXT: $v8 = COPY [[ICMP]](<vscale x 4 x s1>)
+ ; RV64-NEXT: PseudoRET implicit $v8
+ %0:_(<vscale x 64 x s1>) = G_IMPLICIT_DEF
+ %1:_(<vscale x 4 x s1>) = G_EXTRACT_SUBVECTOR %0(<vscale x 64 x s1>), 16
+ $v8 = COPY %1(<...
[truncated]
|
llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
Show resolved
Hide resolved
llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
Show resolved
Hide resolved
I retract my comment. With
|
dee17c6
to
6a07409
Compare
llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
Outdated
Show resolved
Hide resolved
// SDAG builds a TargetExtractSubreg. A Copy with SubReg specified on the | ||
// source Register is the equivalent. | ||
Vec = MIB.buildInstr(TargetOpcode::COPY, {InterLitTy}, {}) | ||
.addReg(Vec, 0, SubRegIdx) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's not good to introduce a subregister index at this point. A subregister index requires a concrete register class (the verifier should probably enforce no generic register operands have subregister indexes)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The verifier does check this. *** Bad machine code: Generic virtual register does not allow subregister index ***
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the sort of case that G_EXTRACT could be used for
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need to use G_EXTRACT. There's an index for G_EXTRACT_SUBVECTOR that should work here and will produce a "RemIdx" of 0 when it gets legalized. I believe its Idx-RemIdx
. Except that RemIdx is scalable and Idx isn't so that needs to be accounted for.
If we use G_EXTRACT then all G_EXTRACT_SUBVECTORS should be legalized to G_EXTRACT. Having 2 ways to represent the same thing is silly.
I'm looking into fixing this in SelectionDAG too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does RemIdx really need to be scalable? SelectionDAG uses getElementCount and passes RemIdx which is scalable. But I think we can use buildVScale in GISel, which does not require us to make it an ElementCount
llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-extract-subvector.mir
Show resolved
Hide resolved
ping |
.customIf(typeIsLegalBoolVec(0, BoolVecTys, ST)) | ||
.customIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be nicer if these were fused into one customIf condition
Idx /= 8; | ||
BigTy = LLT::vector(BigTy.getElementCount().divideCoefficientBy(8), 8); | ||
LitTy = LLT::vector(LitTy.getElementCount().divideCoefficientBy(8), 8); | ||
auto CastVec = MIB.buildBitcast(BigTy, Src); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this be expressed as a bitcast action, instead of custom?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What would be the point? MIB.buildBitcast(BigTy, Src);
is exactly what we want to do. There are no other checks we'd like to be performed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The point would be move it to generic code
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ah, I thought you just meant changing auto CastVec = MIB.buildBitcast(BigTy, Src)
to Helper.bitcast()
, but it seems like you mean this entire block.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, there is already a bitcast lowering action to to this kind of coercion
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it, updated. Thanks for the suggestion.
return Legalized; | ||
|
||
auto AdjustAmt = CastTy.getScalarType().getSizeInBits(); | ||
if (DstTyMinElts < AdjustAmt || SrcTyMinElts < AdjustAmt) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this redundant with the DstTyMinElts % AdjustAmt != 0
and SrcTyMinElts % AdjustAmt != 0
checks?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In selectiondag:
if (VecVT.getVectorMinNumElements() >= 8 && SubVecVT.getVectorMinNumElements() >= 8) {
assert(OrigIdx % 8 == 0 && "Invalid index");
assert(VecVT.getVectorMinNumElements() % 8 == 0 && SubVecVT.getVectorMinNumElements() % 8 == 0 && "Unexpected mask vector lowering");
...
}
I was bringing this over. It's probably redundant in both cases?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Your AdjustAmt isn't correct. It only works if the original element type is i1. For any other type you need the ratio of the new and old element sizes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In selectiondag:
if (VecVT.getVectorMinNumElements() >= 8 && SubVecVT.getVectorMinNumElements() >= 8) { assert(OrigIdx % 8 == 0 && "Invalid index"); assert(VecVT.getVectorMinNumElements() % 8 == 0 && SubVecVT.getVectorMinNumElements() % 8 == 0 && "Unexpected mask vector lowering"); ... }
I was bringing this over. It's probably redundant in both cases?
The SelectionDAG asserts checking something more specific. Because that code runs after legalization, it should only receive a legal type. The only legal types with more than 8 elements have a multiple of 8 elements.
The GISel code doesn't get to deal with only legal types.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed.
edit: just saw your comment. i will fix
if (LitTy.getElementType() == LLT::scalar(1)) { | ||
auto BigTyMinElts = BigTy.getElementCount().getKnownMinValue(); | ||
auto LitTyMinElts = LitTy.getElementCount().getKnownMinValue(); | ||
if (BigTyMinElts >= 8 && LitTyMinElts >= 8) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this check be done with a bitcastIf on getActionDefinitionsBuilder?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is no bitcastIf
. I could add it and check that the element type is a s1 and BigTyMinElts >= 8 && LitTyMinElts >= 8
if we'd like.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AArch64 uses it
.bitcastIf(typeInSet(0, {v4s8}),
[=](const LegalityQuery &Query) {
const LLT VecTy = Query.Types[0];
return std::pair(0, LLT::scalar(VecTy.getSizeInBits()));
})
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh thanks, I didn't see it in https://llvm.org/doxygen/classllvm_1_1LegalizerHelper.html. Updated this patch.
all(typeIsLegalBoolVec(0, BoolVecTys, ST), ExtractSubvecBitcastPred), | ||
/*Mutation=*/nullptr) | ||
.customIf( | ||
LegalityPredicates::any(typeIsLegalBoolVec(0, BoolVecTys, ST), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can't just check type0. You have to check type1 too. If type isn't legal, the operation needs to be split before you can do custom legalization.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This still needs to be addressed for the customIf
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
addressed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This is heavily based on the SelectionDAG lowerEXTRACT_SUBVECTOR code.
This is heavily based on the SelectionDAG lowerEXTRACT_SUBVECTOR code.