Skip to content

Commit 41427b0

Browse files
[AArch64] Disable FastISel/GlobalISel for ZT0 state (llvm#82768)
For __arm_new("zt0") we need to have special setup code in the prologue. For calls that don't preserve zt0, we need to emit code preserve ZT0 around the call. This is only emitted by SelectionDAG ISel at the moment.
1 parent 686ec7c commit 41427b0

File tree

4 files changed

+69
-5
lines changed

4 files changed

+69
-5
lines changed

llvm/lib/Target/AArch64/AArch64FastISel.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5179,7 +5179,8 @@ FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
51795179
const TargetLibraryInfo *LibInfo) {
51805180

51815181
SMEAttrs CallerAttrs(*FuncInfo.Fn);
5182-
if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() ||
5182+
if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5183+
CallerAttrs.hasStreamingInterfaceOrBody() ||
51835184
CallerAttrs.hasStreamingCompatibleInterface())
51845185
return nullptr;
51855186
return new AArch64FastISel(FuncInfo, LibInfo);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25892,7 +25892,8 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
2589225892
auto CallerAttrs = SMEAttrs(*Inst.getFunction());
2589325893
auto CalleeAttrs = SMEAttrs(*Base);
2589425894
if (CallerAttrs.requiresSMChange(CalleeAttrs) ||
25895-
CallerAttrs.requiresLazySave(CalleeAttrs))
25895+
CallerAttrs.requiresLazySave(CalleeAttrs) ||
25896+
CallerAttrs.requiresPreservingZT0(CalleeAttrs))
2589625897
return true;
2589725898
}
2589825899
return false;

llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,8 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
535535
}
536536

537537
SMEAttrs Attrs(F);
538-
if (Attrs.hasZAState() || Attrs.hasStreamingInterfaceOrBody() ||
538+
if (Attrs.hasZAState() || Attrs.hasZT0State() ||
539+
Attrs.hasStreamingInterfaceOrBody() ||
539540
Attrs.hasStreamingCompatibleInterface())
540541
return true;
541542

llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -fast-isel=true -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme < %s \
2+
; RUN: llc -fast-isel=true -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s \
33
; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FISEL
4-
; RUN: llc -fast-isel=false -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme < %s \
4+
; RUN: llc -fast-isel=false -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s \
55
; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-GISEL
66

77

@@ -447,3 +447,64 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
447447
%res = frem float %a, %b
448448
ret float %res
449449
}
450+
451+
;
452+
; Check ZT0 State
453+
;
454+
455+
declare double @zt0_shared_callee(double) "aarch64_inout_zt0"
456+
457+
define double @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline optnone "aarch64_new_zt0" {
458+
; CHECK-COMMON-LABEL: zt0_new_caller_to_zt0_shared_callee:
459+
; CHECK-COMMON: // %bb.0: // %prelude
460+
; CHECK-COMMON-NEXT: sub sp, sp, #80
461+
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
462+
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
463+
; CHECK-COMMON-NEXT: cbz x8, .LBB13_2
464+
; CHECK-COMMON-NEXT: b .LBB13_1
465+
; CHECK-COMMON-NEXT: .LBB13_1: // %save.za
466+
; CHECK-COMMON-NEXT: mov x8, sp
467+
; CHECK-COMMON-NEXT: str zt0, [x8]
468+
; CHECK-COMMON-NEXT: bl __arm_tpidr2_save
469+
; CHECK-COMMON-NEXT: ldr zt0, [x8]
470+
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
471+
; CHECK-COMMON-NEXT: b .LBB13_2
472+
; CHECK-COMMON-NEXT: .LBB13_2: // %entry
473+
; CHECK-COMMON-NEXT: smstart za
474+
; CHECK-COMMON-NEXT: zero { zt0 }
475+
; CHECK-COMMON-NEXT: bl zt0_shared_callee
476+
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
477+
; CHECK-COMMON-NEXT: fmov d1, x8
478+
; CHECK-COMMON-NEXT: fadd d0, d0, d1
479+
; CHECK-COMMON-NEXT: smstop za
480+
; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
481+
; CHECK-COMMON-NEXT: add sp, sp, #80
482+
; CHECK-COMMON-NEXT: ret
483+
entry:
484+
%call = call double @zt0_shared_callee(double %x)
485+
%add = fadd double %call, 4.200000e+01
486+
ret double %add;
487+
}
488+
489+
define double @zt0_shared_caller_to_normal_callee(double %x) nounwind noinline optnone "aarch64_inout_zt0" {
490+
; CHECK-COMMON-LABEL: zt0_shared_caller_to_normal_callee:
491+
; CHECK-COMMON: // %bb.0: // %entry
492+
; CHECK-COMMON-NEXT: sub sp, sp, #80
493+
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
494+
; CHECK-COMMON-NEXT: mov x19, sp
495+
; CHECK-COMMON-NEXT: str zt0, [x19]
496+
; CHECK-COMMON-NEXT: smstop za
497+
; CHECK-COMMON-NEXT: bl normal_callee
498+
; CHECK-COMMON-NEXT: smstart za
499+
; CHECK-COMMON-NEXT: ldr zt0, [x19]
500+
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
501+
; CHECK-COMMON-NEXT: fmov d1, x8
502+
; CHECK-COMMON-NEXT: fadd d0, d0, d1
503+
; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
504+
; CHECK-COMMON-NEXT: add sp, sp, #80
505+
; CHECK-COMMON-NEXT: ret
506+
entry:
507+
%call = call double @normal_callee(double %x)
508+
%add = fadd double %call, 4.200000e+01
509+
ret double %add;
510+
}

0 commit comments

Comments
 (0)