-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AArch64][GISel] Translate legal SVE formal arguments and select COPY for SVE #95236
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
44567f6
80ac5b8
659e601
f982bcd
4fb52e1
f93104a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,6 +53,8 @@ | |
using namespace llvm; | ||
using namespace AArch64GISelUtils; | ||
|
||
extern cl::opt<bool> EnableSVEGISel; | ||
|
||
AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI) | ||
: CallLowering(&TLI) {} | ||
|
||
|
@@ -525,10 +527,10 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder, | |
|
||
bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { | ||
auto &F = MF.getFunction(); | ||
if (F.getReturnType()->isScalableTy() || | ||
llvm::any_of(F.args(), [](const Argument &A) { | ||
return A.getType()->isScalableTy(); | ||
})) | ||
if (!EnableSVEGISel && (F.getReturnType()->isScalableTy() || | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are scalable return types enabled now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Return is not supported yet. I'm following the same idea from #92130 (comment) to enable SVE only with a debug option |
||
llvm::any_of(F.args(), [](const Argument &A) { | ||
return A.getType()->isScalableTy(); | ||
}))) | ||
return true; | ||
const auto &ST = MF.getSubtarget<AArch64Subtarget>(); | ||
if (!ST.hasNEON() || !ST.hasFPARMv8()) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -597,8 +597,14 @@ getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, | |
/// Given a register bank, and size in bits, return the smallest register class | ||
/// that can represent that combination. | ||
static const TargetRegisterClass * | ||
getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, | ||
getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The variable name "SizeInBits" is not appropriate for Type "TypeSize". Please have something else. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's fine, as it's obtained from There are both |
||
bool GetAllRegSet = false) { | ||
if (SizeInBits.isScalable()) { | ||
assert(RB.getID() == AArch64::FPRRegBankID && | ||
"Expected FPR regbank for scalable type size"); | ||
return &AArch64::ZPRRegClass; | ||
} | ||
|
||
unsigned RegBankID = RB.getID(); | ||
|
||
if (RegBankID == AArch64::GPRRegBankID) { | ||
|
@@ -939,8 +945,9 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, | |
Register SrcReg = I.getOperand(1).getReg(); | ||
const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); | ||
const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); | ||
unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); | ||
unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); | ||
|
||
TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); | ||
TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); | ||
|
||
// Special casing for cross-bank copies of s1s. We can technically represent | ||
// a 1-bit value with any size of register. The minimum size for a GPR is 32 | ||
|
@@ -951,7 +958,7 @@ getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, | |
// register bank. Or make a new helper that carries along some constraint | ||
// information. | ||
if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) | ||
SrcSize = DstSize = 32; | ||
SrcSize = DstSize = TypeSize::getFixed(32); | ||
|
||
return {getMinClassForRegBank(SrcRegBank, SrcSize, true), | ||
getMinClassForRegBank(DstRegBank, DstSize, true)}; | ||
|
@@ -1016,8 +1023,8 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, | |
return false; | ||
} | ||
|
||
unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); | ||
unsigned DstSize = TRI.getRegSizeInBits(*DstRC); | ||
const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC); | ||
const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC); | ||
unsigned SubReg; | ||
|
||
// If the source bank doesn't support a subregister copy small enough, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | ||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -global-isel -global-isel-abort=1 -aarch64-enable-gisel-sve=1 %s -o - | FileCheck %s | ||
|
||
;; Test the correct usage of the Z registers with multiple SVE arguments. | ||
|
||
define void @formal_argument_nxv16i8_2(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, ptr %p) { | ||
; CHECK-LABEL: formal_argument_nxv16i8_2: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p0.b | ||
; CHECK-NEXT: st1b { z0.b }, p0, [x0] | ||
; CHECK-NEXT: st1b { z1.b }, p0, [x0] | ||
; CHECK-NEXT: ret | ||
store <vscale x 16 x i8> %0, ptr %p, align 16 | ||
store <vscale x 16 x i8> %1, ptr %p, align 16 | ||
ret void | ||
} | ||
|
||
define void @formal_argument_nxv16i8_8( | ||
; CHECK-LABEL: formal_argument_nxv16i8_8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ptrue p0.b | ||
; CHECK-NEXT: st1b { z0.b }, p0, [x0] | ||
; CHECK-NEXT: st1b { z1.b }, p0, [x0] | ||
; CHECK-NEXT: st1b { z2.b }, p0, [x0] | ||
; CHECK-NEXT: st1b { z3.b }, p0, [x0] | ||
; CHECK-NEXT: st1b { z4.b }, p0, [x0] | ||
; CHECK-NEXT: st1b { z5.b }, p0, [x0] | ||
; CHECK-NEXT: st1b { z6.b }, p0, [x0] | ||
; CHECK-NEXT: st1b { z7.b }, p0, [x0] | ||
; CHECK-NEXT: ret | ||
<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2, <vscale x 16 x i8> %3, | ||
<vscale x 16 x i8> %4, <vscale x 16 x i8> %5, <vscale x 16 x i8> %6, <vscale x 16 x i8> %7, | ||
ptr %p) { | ||
|
||
store <vscale x 16 x i8> %0, ptr %p, align 16 | ||
store <vscale x 16 x i8> %1, ptr %p, align 16 | ||
store <vscale x 16 x i8> %2, ptr %p, align 16 | ||
store <vscale x 16 x i8> %3, ptr %p, align 16 | ||
store <vscale x 16 x i8> %4, ptr %p, align 16 | ||
store <vscale x 16 x i8> %5, ptr %p, align 16 | ||
store <vscale x 16 x i8> %6, ptr %p, align 16 | ||
store <vscale x 16 x i8> %7, ptr %p, align 16 | ||
ret void | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This shouldn't need to spread. I don't like having fallBackToDAGISel at all, but I see we have 2 of them. Why not just put them both in the same place?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AArch64CallLowering::fallBackToDAGISel
works onMachineFunction
as an early path before visiting each instruction.AArch64TargetLowering::fallBackToDAGISel
works on eachInstruction
.FastISel also uses
AArch64TargetLowering::fallBackToDAGISel
, making it more complex to merge the logic.This patch does not completely support SVE formal arguments, especially for predicate registers <vscale x 16 x i1>.
The option is needed until we support them.