[AArch64][GISel] Translate legal SVE formal arguments and select COPY for SVE #95236

Him188 · 2024-06-12T12:31:03Z

This patch adds support for legal SVE fromal arguments in IRTranslator, and support for COPY with SVE.

SVE arguments are allowed only if the hidden option -aarch64-enable-gisel-sve is enabled. Illegal types and predicates like nxv8i1 are not supported yet.

llvmbot · 2024-06-12T12:31:35Z

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-aarch64

Author: Him188 (Him188)

Changes

This patch adds support for legal SVE fromal arguments in IRTranslator, and support for COPY with SVE.

SVE arguments are allowed only if the hidden option -aarch64-enable-gisel-sve is enabled. Illegal types and predicates like nxv8i1 are not supported yet.

Patch is 27.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/95236.diff

6 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+1-1)
(modified) llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp (+4-2)
(modified) llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (+13-6)
(modified) llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp (+4-3)
(added) llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument.ll (+45)
(added) llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument.ll (+389)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 360a841bdade4..aae6bfa565648 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -149,7 +149,7 @@ static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
 // scalable vector types for all instruction, even if SVE is not yet supported
 // with some instructions.
 // See [AArch64TargetLowering::fallbackToDAGISel] for implementation details.
-static cl::opt<bool> EnableSVEGISel(
+cl::opt<bool> EnableSVEGISel(
     "aarch64-enable-gisel-sve", cl::Hidden,
     cl::desc("Enable / disable SVE scalable vectors in Global ISel"),
     cl::init(false));
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 270474f80767a..9cb70c826b385 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -53,6 +53,8 @@
 using namespace llvm;
 using namespace AArch64GISelUtils;
 
+extern cl::opt<bool> EnableSVEGISel;
+
 AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
   : CallLowering(&TLI) {}
 
@@ -525,10 +527,10 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
 
 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
   auto &F = MF.getFunction();
-  if (F.getReturnType()->isScalableTy() ||
+  if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() ||
       llvm::any_of(F.args(), [](const Argument &A) {
         return A.getType()->isScalableTy();
-      }))
+      })))
     return true;
   const auto &ST = MF.getSubtarget<AArch64Subtarget>();
   if (!ST.hasNEON() || !ST.hasFPARMv8()) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 4a7c82b393c10..a23a31df1356c 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -597,8 +597,14 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
 /// Given a register bank, and size in bits, return the smallest register class
 /// that can represent that combination.
 static const TargetRegisterClass *
-getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
+getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,
                       bool GetAllRegSet = false) {
+  if (SizeInBits.isScalable()) {
+    assert(RB.getID() == AArch64::FPRRegBankID
+           && "Expected FPR regbank for scalable type size");
+    return &AArch64::ZPRRegClass;
+  }
+  
   unsigned RegBankID = RB.getID();
 
   if (RegBankID == AArch64::GPRRegBankID) {
@@ -939,8 +945,9 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
   Register SrcReg = I.getOperand(1).getReg();
   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
-  unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
-  unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+
+  TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
+  TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
 
   // Special casing for cross-bank copies of s1s. We can technically represent
   // a 1-bit value with any size of register. The minimum size for a GPR is 32
@@ -951,7 +958,7 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
   // register bank. Or make a new helper that carries along some constraint
   // information.
   if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
-    SrcSize = DstSize = 32;
+    SrcSize = DstSize = TypeSize::getFixed(32);
 
   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
           getMinClassForRegBank(DstRegBank, DstSize, true)};
@@ -1016,8 +1023,8 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
       return false;
     }
 
-    unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
-    unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
+    const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
+    const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
     unsigned SubReg;
 
     // If the source bank doesn't support a subregister copy small enough,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index 4aa6999d1d3ca..c8fa242992beb 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -258,6 +258,7 @@ AArch64RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
   case AArch64::QQQRegClassID:
   case AArch64::QQQQRegClassID:
   case AArch64::ZPRRegClassID:
+  case AArch64::ZPR_3bRegClassID:
     return getRegBank(AArch64::FPRRegBankID);
   case AArch64::GPR32commonRegClassID:
   case AArch64::GPR32RegClassID:
@@ -700,10 +701,10 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       // If both RB are null that means both registers are generic.
       // We shouldn't be here.
       assert(DstRB && SrcRB && "Both RegBank were nullptr");
-      unsigned Size = getSizeInBits(DstReg, MRI, TRI);
+      TypeSize Size = getSizeInBits(DstReg, MRI, TRI);
       return getInstructionMapping(
-          DefaultMappingID, copyCost(*DstRB, *SrcRB, TypeSize::getFixed(Size)),
-          getCopyMapping(DstRB->getID(), SrcRB->getID(), Size),
+          DefaultMappingID, copyCost(*DstRB, *SrcRB, Size),
+          getCopyMapping(DstRB->getID(), SrcRB->getID(), Size.getKnownMinValue()),
           // We only care about the mapping of the destination.
           /*NumOperands*/ 1);
     }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument.ll b/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument.ll
new file mode 100644
index 0000000000000..32559f0898ff5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -O0 -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 %s -o - | FileCheck %s
+;; vscale x 128-bit
+
+define void @formal_argument_nxv16i8(<vscale x 16 x i8> %0, ptr %p) {
+; CHECK-LABEL: formal_argument_nxv16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
+; CHECK-NEXT:    ret
+  store <vscale x 16 x i8> %0, ptr %p, align 16
+  ret void
+}
+
+define void @formal_argument_nxv8i16(<vscale x 8 x i16> %0, ptr %p) {
+; CHECK-LABEL: formal_argument_nxv8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
+  store <vscale x 8 x i16> %0, ptr %p, align 16
+  ret void
+}
+
+define void @formal_argument_nxv4i32(<vscale x 4 x i32> %0, ptr %p) {
+; CHECK-LABEL: formal_argument_nxv4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
+  store <vscale x 4 x i32> %0, ptr %p, align 16
+  ret void
+}
+
+define void @formal_argument_nxv2i64(<vscale x 2 x i64> %0, ptr %p) {
+; CHECK-LABEL: formal_argument_nxv2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  store <vscale x 2 x i64> %0, ptr %p, align 16
+  ret void
+}
+
+;; TODO: Add tests for other types when store is supported for them.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument.ll b/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument.ll
new file mode 100644
index 0000000000000..ec89da824779a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument.ll
@@ -0,0 +1,389 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -O0 -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 \
+; RUN:     -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s
+
+;; vscale x 128-bit
+
+define void @formal_argument_nxv16i8(<vscale x 16 x i8> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv16i8
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv8i16(<vscale x 8 x i16> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv8i16
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv4i32(<vscale x 4 x i32> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv4i32
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv2i64(<vscale x 2 x i64> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv2i64
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv4f32(<vscale x 4 x float> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv4f32
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv2f64(<vscale x 2 x double> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv2f64
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv2p0(<vscale x 2 x ptr> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv2p0
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 2 x p0>) = COPY $z0
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+;; vscale x 256-bit
+
+define void @formal_argument_nxv32i8(<vscale x 32 x i8> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv32i8
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s8>) = G_CONCAT_VECTORS [[COPY]](<vscale x 16 x s8>), [[COPY1]](<vscale x 16 x s8>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv16i16(<vscale x 16 x i16> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv16i16
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s16>) = G_CONCAT_VECTORS [[COPY]](<vscale x 8 x s16>), [[COPY1]](<vscale x 8 x s16>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv8i32(<vscale x 8 x i32> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv8i32
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv4i64(<vscale x 4 x i64> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv4i64
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 4 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv8f32(<vscale x 8 x float> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv8f32
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv4f64(<vscale x 4 x double> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv4f64
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 4 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv4p0(<vscale x 4 x ptr> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv4p0
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 4 x p0>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+;; vscale x 512-bit
+
+define void @formal_argument_nxv64i8(<vscale x 64 x i8> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv64i8
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1, $z2, $z3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z3
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 64 x s8>) = G_CONCAT_VECTORS [[COPY]](<vscale x 16 x s8>), [[COPY1]](<vscale x 16 x s8>), [[COPY2]](<vscale x 16 x s8>), [[COPY3]](<vscale x 16 x s8>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv32i16(<vscale x 32 x i16> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv32i16
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1, $z2, $z3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<vscale x 8 x s16>) = COPY $z3
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 32 x s16>) = G_CONCAT_VECTORS [[COPY]](<vscale x 8 x s16>), [[COPY1]](<vscale x 8 x s16>), [[COPY2]](<vscale x 8 x s16>), [[COPY3]](<vscale x 8 x s16>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv16i32(<vscale x 16 x i32> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv16i32
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1, $z2, $z3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv8i64(<vscale x 8 x i64> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv8i64
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1, $z2, $z3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv16f32(<vscale x 16 x float> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv16f32
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1, $z2, $z3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z3
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 16 x s32>) = G_CONCAT_VECTORS [[COPY]](<vscale x 4 x s32>), [[COPY1]](<vscale x 4 x s32>), [[COPY2]](<vscale x 4 x s32>), [[COPY3]](<vscale x 4 x s32>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv8f64(<vscale x 8 x double> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv8f64
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1, $z2, $z3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x s64>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+define void @formal_argument_nxv8p0(<vscale x 8 x ptr> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv8p0
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1, $z2, $z3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<vscale x 2 x s64>) = COPY $z3
+  ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<vscale x 8 x p0>) = G_CONCAT_VECTORS [[COPY]](<vscale x 2 x s64>), [[COPY1]](<vscale x 2 x s64>), [[COPY2]](<vscale x 2 x s64>), [[COPY3]](<vscale x 2 x s64>)
+  ; CHECK-NEXT:   RET_ReallyLR
+  ret void
+}
+
+;; vscale x 1024-bit
+
+define void @formal_argument_nxv128i8(<vscale x 128 x i8> %0) {
+  ; CHECK-LABEL: name: formal_argument_nxv128i8
+  ; CHECK: bb.1 (%ir-block.1):
+  ; CHECK-NEXT:   liveins: $z0, $z1, $z2, $z3, $z4, $z5, $z6, $z7
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(<vscale x 16 x s8>) = COPY $z7
+  ...
[truncated]

github-actions · 2024-06-12T12:34:15Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp

arsenm · 2024-06-12T13:00:42Z

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

@@ -53,6 +53,8 @@
 using namespace llvm;
 using namespace AArch64GISelUtils;

+extern cl::opt<bool> EnableSVEGISel;


This shouldn't need to spread. I don't like having fallBackToDAGISel at all, but I see we have 2 of them. Why not just put them both in the same place?

AArch64CallLowering::fallBackToDAGISel works on MachineFunction as an early path before visiting each instruction.
AArch64TargetLowering::fallBackToDAGISel works on each Instruction.

FastISel also uses AArch64TargetLowering::fallBackToDAGISel, making it more complex to merge the logic.

This patch does not completely support SVE formal arguments, especially for predicate registers <vscale x 16 x i1>.
The option is needed until we support them.

tschuett · 2024-06-12T13:48:52Z

llvm/test/CodeGen/AArch64/GlobalISel/translate-sve-formal-argument.ll

+; RUN: llc -mtriple=aarch64-linux-gnu -O0 -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 \
+; RUN:     -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck %s
+
+;; vscale x 128-bit


Could you add tests with several legal arguments to show the correct usage of the z registers?

Isn't sve-formal-argument.ll doing this?

Yeah. Maybe. But it is assembler.

It is always one argument.

Added more tests

madhur13490

Does this pass all regression tests? I had the same patch locally but I was seeing some failures. May be my patch had some issues...

madhur13490 · 2024-06-12T14:40:02Z

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

@@ -597,8 +597,14 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
 /// Given a register bank, and size in bits, return the smallest register class
 /// that can represent that combination.
 static const TargetRegisterClass *
-getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
+getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits,


The variable name "SizeInBits" is not appropriate for Type "TypeSize". Please have something else.

I think it's fine, as it's obtained from TypeSize RegisterBankInfo::getSizeInBits().

There are both TypeSize LLT::getSizeInBytes() and TypeSize LLT::getSizeInBits() so TypeSize itself does not have a unit. It will be better if we specify the unit in a function parameter.

madhur13490 · 2024-06-12T14:41:26Z

llvm/test/CodeGen/AArch64/GlobalISel/sve-formal-argument.ll

@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu -O0 -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 %s -o - | FileCheck %s


Why with -O0?

Him188 · 2024-06-12T15:21:20Z

Does this pass all regression tests?

@madhur13490 Yes, check-all passed. The failing CI is due to failed LoongArch test.

tschuett · 2024-06-13T07:34:29Z

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

@@ -525,10 +527,10 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,

 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
  auto &F = MF.getFunction();
-  if (F.getReturnType()->isScalableTy() ||
+  if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() ||


Are scalable return types enabled now?

Return is not supported yet. I'm following the same idea from #92130 (comment) to enable SVE only with a debug option

davemgreen

I'm not 100% sure about all the RegBankInfo stuff, but we can change that if we need to later. From what I can tell this LGTM if the tests pass with a rebase.

… for SVE

davemgreen · 2024-06-18T07:09:06Z

The CI seems to be a bit flakey lately. Feel free to commit if the failures look unrelated and nothing fails locally.

Him188 requested review from aemerson, arsenm, tschuett, davemgreen and sdesmalen-arm June 12, 2024 12:31

llvmbot added backend:AArch64 llvm:globalisel labels Jun 12, 2024

Him188 requested a review from madhur13490 June 12, 2024 12:41

davemgreen reviewed Jun 12, 2024

View reviewed changes

llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp Outdated Show resolved Hide resolved

arsenm reviewed Jun 12, 2024

View reviewed changes

tschuett reviewed Jun 12, 2024

View reviewed changes

madhur13490 reviewed Jun 12, 2024

View reviewed changes

tschuett reviewed Jun 13, 2024

View reviewed changes

Him188 requested review from tschuett, arsenm and davemgreen June 13, 2024 13:07

davemgreen approved these changes Jun 14, 2024

View reviewed changes

Him188 force-pushed the tguan/gisel-sve-parameters branch 3 times, most recently from 14736f5 to fcdba2e Compare June 17, 2024 09:18

Him188 added 6 commits June 17, 2024 16:41

[AArch64][GISel] Translate legal SVE formal arguments and select COPY…

44567f6

… for SVE

Reformat code

80ac5b8

Change getCopyMapping to take a TypeSize

659e601

Add tests for multiple SVE legal arguments

f982bcd

Remove -O0

4fb52e1

Add more tests

f93104a

Him188 force-pushed the tguan/gisel-sve-parameters branch from fcdba2e to f93104a Compare June 17, 2024 15:41

Him188 merged commit 0e21f12 into llvm:main Jun 18, 2024
5 of 7 checks passed

Him188 deleted the tguan/gisel-sve-parameters branch June 18, 2024 08:29

		@@ -0,0 +1,45 @@
		; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
		; RUN: llc -mtriple=aarch64-linux-gnu -O0 -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 %s -o - \| FileCheck %s

[AArch64][GISel] Translate legal SVE formal arguments and select COPY for SVE #95236

[AArch64][GISel] Translate legal SVE formal arguments and select COPY for SVE #95236

Uh oh!

Conversation

Him188 commented Jun 12, 2024

Uh oh!

llvmbot commented Jun 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Jun 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Him188 Jun 12, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

madhur13490 left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Him188 Jun 13, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Him188 commented Jun 12, 2024

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Him188 Jun 13, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

davemgreen left a comment

Choose a reason for hiding this comment

Uh oh!

davemgreen commented Jun 18, 2024

Uh oh!

Uh oh!

Uh oh!

llvmbot commented Jun 12, 2024 •

edited

Loading

github-actions bot commented Jun 12, 2024 •

edited

Loading

Him188 Jun 12, 2024 •

edited

Loading

Him188 Jun 13, 2024 •

edited

Loading

Him188 Jun 13, 2024 •

edited

Loading