Skip to content

Commit 167fd5a

Browse files
committed
AMDGPU: Respect amdgpu-no-agpr in functions and with calls
Remove the MIR scan to detect whether AGPRs are used or not, and the special case for callable functions. This behavior was confusing, and not overridable. The amdgpu-no-agpr attribute was intended to avoid this imprecise heuristic for how many AGPRs to allocate. It was also too confusing to make this interact with the pending amdgpu-num-agpr replacement for amdgpu-no-agpr. Also adds an xfail-ish test where the register allocator asserts after allocation fails which I ran into. Future work should reintroduce a more refined MIR scan to estimate AGPR pressure for how to split AGPRs and VGPRs.
1 parent af64f0a commit 167fd5a

8 files changed

+655
-64
lines changed

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 6 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
6464
}
6565

6666
MayNeedAGPRs = ST.hasMAIInsts();
67+
if (ST.hasGFX90AInsts() &&
68+
ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
69+
!mayUseAGPRs(F))
70+
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
6771

6872
if (AMDGPU::isChainCC(CC)) {
6973
// Chain functions don't receive an SP from their caller, but are free to
@@ -98,13 +102,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
98102
ImplicitArgPtr = true;
99103
} else {
100104
ImplicitArgPtr = false;
101-
MaxKernArgAlign = std::max(ST.getAlignmentForImplicitArgPtr(),
102-
MaxKernArgAlign);
103-
104-
if (ST.hasGFX90AInsts() &&
105-
ST.getMaxNumVGPRs(F) <= AMDGPU::VGPR_32RegClass.getNumRegs() &&
106-
!mayUseAGPRs(F))
107-
MayNeedAGPRs = false; // We will select all MAI with VGPR operands.
105+
MaxKernArgAlign =
106+
std::max(ST.getAlignmentForImplicitArgPtr(), MaxKernArgAlign);
108107
}
109108

110109
if (!AMDGPU::isGraphics(CC) ||
@@ -783,44 +782,3 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
783782
bool SIMachineFunctionInfo::mayUseAGPRs(const Function &F) const {
784783
return !F.hasFnAttribute("amdgpu-no-agpr");
785784
}
786-
787-
bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
788-
if (UsesAGPRs)
789-
return *UsesAGPRs;
790-
791-
if (!mayNeedAGPRs()) {
792-
UsesAGPRs = false;
793-
return false;
794-
}
795-
796-
if (!AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) ||
797-
MF.getFrameInfo().hasCalls()) {
798-
UsesAGPRs = true;
799-
return true;
800-
}
801-
802-
const MachineRegisterInfo &MRI = MF.getRegInfo();
803-
804-
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
805-
const Register Reg = Register::index2VirtReg(I);
806-
const TargetRegisterClass *RC = MRI.getRegClassOrNull(Reg);
807-
if (RC && SIRegisterInfo::isAGPRClass(RC)) {
808-
UsesAGPRs = true;
809-
return true;
810-
}
811-
if (!RC && !MRI.use_empty(Reg) && MRI.getType(Reg).isValid()) {
812-
// Defer caching UsesAGPRs, function might not yet been regbank selected.
813-
return true;
814-
}
815-
}
816-
817-
for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
818-
if (MRI.isPhysRegUsed(Reg)) {
819-
UsesAGPRs = true;
820-
return true;
821-
}
822-
}
823-
824-
UsesAGPRs = false;
825-
return false;
826-
}

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -494,8 +494,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
494494
// scheduler stage.
495495
unsigned MaxMemoryClusterDWords = DefaultMemoryClusterDWordsLimit;
496496

497-
mutable std::optional<bool> UsesAGPRs;
498-
499497
MCPhysReg getNextUserSGPR() const;
500498

501499
MCPhysReg getNextSystemSGPR() const;
@@ -1126,9 +1124,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
11261124
// has a call which may use it.
11271125
bool mayUseAGPRs(const Function &F) const;
11281126

1129-
// \returns true if a function needs or may need AGPRs.
1130-
bool usesAGPRs(const MachineFunction &MF) const;
1131-
11321127
/// \returns Default/requested number of work groups for this function.
11331128
SmallVector<unsigned> getMaxNumWorkGroups() const { return MaxNumWorkGroups; }
11341129

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,7 @@ SIRegisterInfo::getMaxNumVectorRegs(const MachineFunction &MF) const {
585585
// TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
586586
// register file accordingly.
587587
if (ST.hasGFX90AInsts()) {
588-
if (MFI->usesAGPRs(MF)) {
588+
if (MFI->mayNeedAGPRs()) {
589589
MaxNumVGPRs /= 2;
590590
MaxNumAGPRs = MaxNumVGPRs;
591591
} else {
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
; REQUIRES: asserts
2+
; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=null %s 2>&1 | FileCheck -check-prefix=CRASH %s
3+
4+
; CRASH: error: <unknown>:0:0: no registers from class available to allocate in function 'no_free_vgprs_at_agpr_to_agpr_copy'
5+
; CRASH: Assertion failed: (valid() && "Cannot access invalid iterator")
6+
7+
define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
8+
%asm = call { <32 x i32>, <16 x float> } asm sideeffect "; def $0 $1", "=${v[0:31]},=${a[0:15]}"()
9+
%vgpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 0
10+
%agpr0 = extractvalue { <32 x i32>, <16 x float> } %asm, 1
11+
%mfma = call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float %v0, float %v1, <16 x float> %agpr0, i32 0, i32 0, i32 0)
12+
%agpr1 = call i32 asm sideeffect "; copy ", "={a1},a,~{a[0:15]}"(<16 x float> %agpr0)
13+
%agpr2 = call i32 asm sideeffect "; copy ", "={a2},a,{a[0:15]}"(i32 %agpr1, <16 x float> %mfma)
14+
call void asm sideeffect "; use $0 $1", "{a3},{v[0:31]}"(i32 %agpr2, <32 x i32> %vgpr0)
15+
ret void
16+
}
17+
18+
declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
19+
declare noundef i32 @llvm.amdgcn.workitem.id.x() #2
20+
21+
attributes #0 = { "amdgpu-no-agpr" "amdgpu-waves-per-eu"="6,6" }
22+
attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
23+
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
240240
}
241241

242242
; Check that we do make use of v32 if there are no AGPRs present in the function
243-
define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #0 {
243+
define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #5 {
244244
; GFX908-LABEL: no_agpr_no_reserve:
245245
; GFX908: ; %bb.0:
246246
; GFX908-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
@@ -1144,5 +1144,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() #2
11441144
attributes #0 = { "amdgpu-waves-per-eu"="6,6" }
11451145
attributes #1 = { convergent nounwind readnone willreturn }
11461146
attributes #2 = { nounwind readnone willreturn }
1147-
attributes #3 = { "amdgpu-waves-per-eu"="7,7" }
1147+
attributes #3 = { "amdgpu-waves-per-eu"="7,7" "amdgpu-no-agpr" }
11481148
attributes #4 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-flat-work-group-size"="1024,1024" }
1149+
attributes #5 = { "amdgpu-waves-per-eu"="6,6" "amdgpu-no-agpr" }

llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,20 @@ bb3:
9494
ret void
9595
}
9696

97-
; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry:
97+
; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_noagpr:
98+
; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
99+
; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}]
100+
define void @test_mfma_f32_32x32x1f32_nonentry_noagpr(ptr addrspace(1) %arg) #0 {
101+
bb:
102+
%in.1 = load <32 x float>, ptr addrspace(1) %arg
103+
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
104+
store <32 x float> %mai.1, ptr addrspace(1) %arg
105+
ret void
106+
}
107+
108+
; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_nonentry_with_agpr:
98109
; GCN: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
99-
define void @test_mfma_f32_32x32x1f32_nonentry(ptr addrspace(1) %arg) #0 {
110+
define void @test_mfma_f32_32x32x1f32_nonentry_with_agpr(ptr addrspace(1) %arg) #3 {
100111
bb:
101112
%in.1 = load <32 x float>, ptr addrspace(1) %arg
102113
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
@@ -109,3 +120,4 @@ declare void @foo()
109120
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" "amdgpu-no-agpr" }
110121
attributes #1 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }
111122
attributes #2 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-agpr" }
123+
attributes #3 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="2" }

llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
ret void
77
}
88

9-
attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
9+
attributes #0 = { "amdgpu-waves-per-eu"="8,8" "amdgpu-no-agpr" }
1010
...
1111

1212
---

0 commit comments

Comments
 (0)