-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][GISel] Translate legal SVE formal arguments and select COPY for SVE #95236
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-aarch64 Author: Him188 (Him188) ChangesThis patch adds support for legal SVE fromal arguments in IRTranslator, and support for COPY with SVE. SVE arguments are allowed only if the hidden option Patch is 27.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95236.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 360a841bdade4..aae6bfa565648 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -149,7 +149,7 @@ static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
// scalable vector types for all instruction, even if SVE is not yet supported
// with some instructions.
// See [AArch64TargetLowering::fallbackToDAGISel] for implementation details.
-static cl::opt<bool> EnableSVEGISel(
+cl::opt<bool> EnableSVEGISel(
"aarch64-enable-gisel-sve", cl::Hidden,
cl::desc("Enable / disable SVE scalable vectors in Global ISel"),
cl::init(false));
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 270474f80767a..9cb70c826b385 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -53,6 +53,8 @@
using namespace llvm;
using namespace AArch64GISelUtils;
+extern cl::opt<bool> EnableSVEGISel;
+
AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
: CallLowering(&TLI) {}
@@ -525,10 +527,10 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
auto &F = MF.getFunction();
- if (F.getReturnType()->isScalableTy() ||
+ if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() ||
llvm::any_of(F.args(), [](const Argument &A) {
return A.getType()->isScalableTy();
- }))
+ })))
return true;
const auto &ST = MF.getSubtarget<AArch64Subtarget>();
if (!ST.hasNEON() || !ST.hasFPARMv8()) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 4a7c82b393c10..a23a31df1356c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -597,8 +597,14 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
/// Given a register bank, and size in bits, return the smallest register class
/// that can represent that combination.
static const TargetRegisterClass *
-getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
+getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
bool GetAllRegSet = false) {
+ if (SizeInBits.isScalable()) {
+ assert(RB.getID() == AArch64::FPRRegBankID
+ && "Expected FPR regbank for scalable type size");
+ return &AArch64::ZPRRegClass;
+ }
+
unsigned RegBankID = RB.getID();
if (RegBankID == AArch64::GPRRegBankID) {
@@ -939,8 +945,9 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
Register SrcReg = I.getOperand(1).getReg();
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
- unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
- unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+
+ TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+ TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
// Special casing for cross-bank copies of s1s. We can technically represent
// a 1-bit value with any size of register. The minimum size for a GPR is 32
@@ -951,7 +958,7 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
// register bank. Or make a new helper that carries along some constraint
// information.
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
- SrcSize = DstSize = 32;
+ SrcSize = DstSize = TypeSize::getFixed(32);
return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
getMinClassForRegBank(DstRegBank, DstSize, true)};
@@ -1016,8 +1023,8 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
return false;
}
- unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
- unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
+ const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
+ const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
unsigned SubReg;
// If the source bank doesn't support a subregister copy small enough,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 4aa6999d1d3ca..c8fa242992beb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -258,6 +258,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case AArch64::QQQRegClassID:
case AArch64::QQQQRegClassID:
case AArch64::ZPRRegClassID:
+ case AArch64::ZPR_3bRegClassID:
return getRegBank(AArch64::FPRRegBankID);
case AArch64::GPR32commonRegClassID:
case AArch64::GPR32RegClassID:
@@ -700,10 +701,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// If both RB are null that means both registers are generic.
// We shouldn't be here.
assert(DstRB && SrcRB && "Both RegBank were nullptr");
- unsigned Size = getSizeInBits(DstReg, MRI, TRI);
+ TypeSize Size = getSizeInBits(DstReg, MRI, TRI);
return getInstructionMapping(
- DefaultMappingID, copyCost(*DstRB, *SrcRB, TypeSize::getFixed(Size)),
- getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
+ DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
+ getCopyMapping(DstRB->getID(), SrcRB->getID(), Size.getKnownMinValue()),
// We only care about the mapping of the destination.
/*NumOperands*/ 1);
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument.ll b/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument.ll
new file mode 100644
index 0000000000000..32559f0898ff5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -O0 -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 %s -o - | FileCheck %s
+;; vscale x 128-bit
+
+define void @formal_argument_nxv16i8(<vscale x 16 x i8> %0, ptr %p) {
+; CHECK-LABEL: formal_argument_nxv16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.b
+; CHECK-NEXT: st1b { z0.b }, p0, [x0]
+; CHECK-NEXT: ret
+ store <vscale x 16 x i8> %0, ptr %p, align 16
+ ret void
+}
+
+define void @formal_argument_nxv8i16(<vscale x 8 x i16> %0, ptr %p) {
+; CHECK-LABEL: formal_argument_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: st1h { z0.h }, p0, [x0]
+; CHECK-NEXT: ret
+ store <vscale x 8 x i16> %0, ptr %p, align 16
+ ret void
+}
+
+define void @formal_argument_nxv4i32(<vscale x 4 x i32> %0, ptr %p) {
+; CHECK-LABEL: formal_argument_nxv4i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: st1w { z0.s }, p0, [x0]
+; CHECK-NEXT: ret
+ store <vscale x 4 x i32> %0, ptr %p, align 16
+ ret void
+}
+
+define void @formal_argument_nxv2i64(<vscale x 2 x i64> %0, ptr %p) {
+; CHECK-LABEL: formal_argument_nxv2i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.d
+; CHECK-NEXT: st1d { z0.d }, p0, [x0]
+; CHECK-NEXT: ret
+ store <vscale x 2 x i64> %0, ptr %p, align 16
+ ret void
+}
+
+;; TODO: Add tests for other types when store is supported for them.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument.ll b/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument.ll
new file mode 100644
index 0000000000000..ec89da824779a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument.ll
@@ -0,0 +1,389 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -O0 -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 \
+; RUN: -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s
+
+;; vscale x 128-bit
+
+define void @formal_argument_nxv16i8(<vscale x 16 x i8> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv16i8
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv8i16(<vscale x 8 x i16> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv8i16
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv4i32(<vscale x 4 x i32> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv4i32
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv2i64(<vscale x 2 x i64> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv2i64
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv4f32(<vscale x 4 x float> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv4f32
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv2f64(<vscale x 2 x double> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv2f64
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv2p0(<vscale x 2 x ptr> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv2p0
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x p0>) = COPY $z0
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+;; vscale x 256-bit
+
+define void @formal_argument_nxv32i8(<vscale x 32 x i8> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv32i8
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s8>) = G_CONCAT_VECTORS [[COPY]](<vscale x 16 x s8>), [[COPY1]](<vscale x 16 x s8>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv16i16(<vscale x 16 x i16> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv16i16
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s16>) = G_CONCAT_VECTORS [[COPY]](<vscale x 8 x s16>), [[COPY1]](<vscale x 8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv8i32(<vscale x 8 x i32> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv8i32
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv4i64(<vscale x 4 x i64> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv4i64
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 4 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv8f32(<vscale x 8 x float> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv8f32
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv4f64(<vscale x 4 x double> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv4f64
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 4 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv4p0(<vscale x 4 x ptr> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv4p0
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 4 x p0>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+;; vscale x 512-bit
+
+define void @formal_argument_nxv64i8(<vscale x 64 x i8> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv64i8
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z3
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 64 x s8>) = G_CONCAT_VECTORS [[COPY]](<vscale x 16 x s8>), [[COPY1]](<vscale x 16 x s8>), [[COPY2]](<vscale x 16 x s8>), [[COPY3]](<vscale x 16 x s8>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv32i16(<vscale x 32 x i16> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv32i16
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z3
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s16>) = G_CONCAT_VECTORS [[COPY]](<vscale x 8 x s16>), [[COPY1]](<vscale x 8 x s16>), [[COPY2]](<vscale x 8 x s16>), [[COPY3]](<vscale x 8 x s16>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv16i32(<vscale x 16 x i32> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv16i32
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv8i64(<vscale x 8 x i64> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv8i64
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv16f32(<vscale x 16 x float> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv16f32
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv8f64(<vscale x 8 x double> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv8f64
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+define void @formal_argument_nxv8p0(<vscale x 8 x ptr> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv8p0
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x p0>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>)
+ ; CHECK-NEXT: RET_ReallyLR
+ ret void
+}
+
+;; vscale x 1024-bit
+
+define void @formal_argument_nxv128i8(<vscale x 128 x i8> %0) {
+ ; CHECK-LABEL: name: formal_argument_nxv128i8
+ ; CHECK: bb.1 (%ir-block.1):
+ ; CHECK-NEXT: liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z3
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z4
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z5
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z6
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z7
+ ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@@ -53,6 +53,8 @@ | |||
using namespace llvm; | |||
using namespace AArch64GISelUtils; | |||
|
|||
extern cl::opt<bool> EnableSVEGISel; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't need to spread. I don't like having fallBackToDAGISel at all, but I see we have 2 of them. Why not just put them both in the same place?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AArch64CallLowering::fallBackToDAGISel
works on MachineFunction
as an early path before visiting each instruction.
AArch64TargetLowering::fallBackToDAGISel
works on each Instruction
.
FastISel also uses AArch64TargetLowering::fallBackToDAGISel
, making it more complex to merge the logic.
This patch does not completely support SVE formal arguments, especially for predicate registers <vscale x 16 x i1>.
The option is needed until we support them.
; RUN: llc -mtriple=aarch64-linux-gnu -O0 -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 \ | ||
; RUN: -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s | ||
|
||
;; vscale x 128-bit |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you add tests with several legal arguments to show the correct usage of the z registers?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't sve-formal-argument.ll doing this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah. Maybe. But it is assembler.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is always one argument.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added more tests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this pass all regression tests? I had the same patch locally but I was seeing some failures. May be my patch had some issues...
@@ -597,8 +597,14 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, | |||
/// Given a register bank, and size in bits, return the smallest register class | |||
/// that can represent that combination. | |||
static const TargetRegisterClass * | |||
getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, | |||
getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The variable name "SizeInBits" is not appropriate for Type "TypeSize". Please have something else.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's fine, as it's obtained from TypeSize RegisterBankInfo::getSizeInBits()
.
There are both TypeSize LLT::getSizeInBytes()
and TypeSize LLT::getSizeInBits()
so TypeSize
itself does not have a unit. It will be better if we specify the unit in a function parameter.
@@ -0,0 +1,45 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: llc -mtriple=aarch64-linux-gnu -O0 -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 %s -o - | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why with -O0?
@madhur13490 Yes, |
@@ -525,10 +527,10 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, | |||
|
|||
bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { | |||
auto &F = MF.getFunction(); | |||
if (F.getReturnType()->isScalableTy() || | |||
if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() || |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are scalable return types enabled now?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Return is not supported yet. I'm following the same idea from #92130 (comment) to enable SVE only with a debug option
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not 100% sure about all the RegBankInfo stuff, but we can change that if we need to later. From what I can tell this LGTM if the tests pass with a rebase.
14736f5
to
fcdba2e
Compare
fcdba2e
to
f93104a
Compare
The CI seems to be a bit flakey lately. Feel free to commit if the failures look unrelated and nothing fails locally. |
This patch adds support for legal SVE fromal arguments in IRTranslator, and support for COPY with SVE.
SVE arguments are allowed only if the hidden option
-aarch64-enable-gisel-sve
is enabled. Illegal types and predicates likenxv8i1
are not supported yet.