-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Change scope of resource usage info symbols #114810
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Janek van Oirschot (JanekvO) ChangesChange scope of resource usage info MC symbols such that they don't end up in the object file (i.e., as I have tried putting them into their own sections and have the sections omitted but since they're symbolic representations they don't actually end up in any of the sections (let me know if I'm missing something obvious with the section approach, however). Patch is 80.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114810.diff 14 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index da0397fa20bd1b..6d36b516b557ee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -16,6 +16,7 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
@@ -24,7 +25,9 @@ using namespace llvm;
MCSymbol *MCResourceInfo::getSymbol(StringRef FuncName, ResourceInfoKind RIK,
MCContext &OutContext) {
auto GOCS = [FuncName, &OutContext](StringRef Suffix) {
- return OutContext.getOrCreateSymbol(FuncName + Twine(Suffix));
+ return OutContext.getOrCreateSymbol(
+ Twine(OutContext.getAsmInfo()->getPrivateGlobalPrefix()) + FuncName +
+ Twine(Suffix));
};
switch (RIK) {
case RIK_NumVGPR:
@@ -80,15 +83,21 @@ void MCResourceInfo::finalize(MCContext &OutContext) {
}
MCSymbol *MCResourceInfo::getMaxVGPRSymbol(MCContext &OutContext) {
- return OutContext.getOrCreateSymbol("amdgpu.max_num_vgpr");
+ StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
+ return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
+ "amdgpu.max_num_vgpr");
}
MCSymbol *MCResourceInfo::getMaxAGPRSymbol(MCContext &OutContext) {
- return OutContext.getOrCreateSymbol("amdgpu.max_num_agpr");
+ StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
+ return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
+ "amdgpu.max_num_agpr");
}
MCSymbol *MCResourceInfo::getMaxSGPRSymbol(MCContext &OutContext) {
- return OutContext.getOrCreateSymbol("amdgpu.max_num_sgpr");
+ StringRef PrivatePrefix = OutContext.getAsmInfo()->getPrivateGlobalPrefix();
+ return OutContext.getOrCreateSymbol(Twine(PrivatePrefix) +
+ "amdgpu.max_num_sgpr");
}
void MCResourceInfo::assignResourceInfoExpr(
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
index 0e16ea10c019ac..b1dd4fecab2cb9 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-register-count.ll
@@ -154,28 +154,28 @@ bb:
declare void @undef_func()
; GCN-LABEL: {{^}}kernel_call_undef_func:
-; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
-; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
-; GCN: .set kernel_call_undef_func.num_vgpr, max(32, amdgpu.max_num_vgpr)
-; GCN: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr)
-; GCN: NumVgprs: kernel_call_undef_func.num_vgpr
-; GCN: NumAgprs: kernel_call_undef_func.num_agpr
-; GCN: TotalNumVgprs: totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr)
-; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1
-; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1
-; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0)
-; GFX90A: AccumOffset: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4
-; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
-; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(kernel_call_undef_func.numbered_sgpr+(extrasgprs(kernel_call_undef_func.uses_vcc, kernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(kernel_call_undef_func.num_agpr, kernel_call_undef_func.num_vgpr), 1, 0))
-; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, kernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63
+; GCN: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0)
+; GFX90A: .amdhsa_accum_offset ((((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
+; GCN: .set .Lkernel_call_undef_func.num_vgpr, max(32, .Lamdgpu.max_num_vgpr)
+; GCN: .set .Lkernel_call_undef_func.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; GCN: NumVgprs: .Lkernel_call_undef_func.num_vgpr
+; GCN: NumAgprs: .Lkernel_call_undef_func.num_agpr
+; GCN: TotalNumVgprs: totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr)
+; GFX908: VGPRBlocks: ((alignto(max(max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0), 1), 4))/4)-1
+; GFX90A: VGPRBlocks: ((alignto(max(max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0), 1), 8))/8)-1
+; GCN: NumVGPRsForWavesPerEU: max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0)
+; GFX90A: AccumOffset: ((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)+1)*4
+; GFX908: Occupancy: occupancy(10, 4, 256, 8, 10, max(.Lkernel_call_undef_func.numbered_sgpr+(extrasgprs(.Lkernel_call_undef_func.uses_vcc, .Lkernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0))
+; GFX90A: Occupancy: occupancy(8, 8, 512, 8, 8, max(.Lkernel_call_undef_func.numbered_sgpr+(extrasgprs(.Lkernel_call_undef_func.uses_vcc, .Lkernel_call_undef_func.uses_flat_scratch, 1)), 1, 0), max(totalnumvgprs(.Lkernel_call_undef_func.num_agpr, .Lkernel_call_undef_func.num_vgpr), 1, 0))
+; GFX90A: COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: ((((alignto(max(1, .Lkernel_call_undef_func.num_vgpr), 4))/4)-1)&(~65536))&63
define amdgpu_kernel void @kernel_call_undef_func() #0 {
bb:
call void @undef_func()
ret void
}
-; GCN: .set amdgpu.max_num_vgpr, 32
-; GCN-NEXT: .set amdgpu.max_num_agpr, 32
-; GCN-NEXT: .set amdgpu.max_num_sgpr, 34
+; GCN: .set .Lamdgpu.max_num_vgpr, 32
+; GCN-NEXT: .set .Lamdgpu.max_num_agpr, 32
+; GCN-NEXT: .set .Lamdgpu.max_num_sgpr, 34
attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
index 8f4cb364751d88..15284ad45a9261 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-metadata-agpr-register-count.ll
@@ -60,9 +60,9 @@ bb:
declare void @undef_func()
; CHECK: .type kernel_call_undef_func
-; CHECK: .set kernel_call_undef_func.num_agpr, max(0, amdgpu.max_num_agpr)
-; CHECK: NumAgprs: kernel_call_undef_func.num_agpr
-; CHECK: .set amdgpu.max_num_agpr, 32
+; CHECK: .set .Lkernel_call_undef_func.num_agpr, max(0, .Lamdgpu.max_num_agpr)
+; CHECK: NumAgprs: .Lkernel_call_undef_func.num_agpr
+; CHECK: .set .Lamdgpu.max_num_agpr, 32
define amdgpu_kernel void @kernel_call_undef_func() #0 {
bb:
call void @undef_func()
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
index d45e116beb4e3e..374fd32ec5997f 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-vgpr-limit.ll
@@ -547,20 +547,20 @@ define amdgpu_kernel void @f256() #256 {
attributes #256 = { nounwind "amdgpu-flat-work-group-size"="256,256" }
; GCN-LABEL: {{^}}f512:
-; GFX9: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX90A: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX90A: .set f512.num_agpr, max(128, amdgpu.max_num_agpr)
-; GFX10WGP-WAVE32: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX10WGP-WAVE64: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE32: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE64: .set f512.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE32: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE64: .set f512.num_vgpr, max(256, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE32: .set f512.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE64: .set f512.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GCN: NumVgprs: f512.num_vgpr
-; GFX90A: NumAgprs: f512.num_agpr
-; GFX90A: TotalNumVgprs: totalnumvgprs(f512.num_agpr, f512.num_vgpr)
+; GFX9: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf512.num_agpr, max(128, .Lamdgpu.max_num_agpr)
+; GFX10WGP-WAVE32: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX10WGP-WAVE64: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE32: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE64: .set .Lf512.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE32: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE64: .set .Lf512.num_vgpr, max(256, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE32: .set .Lf512.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE64: .set .Lf512.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GCN: NumVgprs: .Lf512.num_vgpr
+; GFX90A: NumAgprs: .Lf512.num_agpr
+; GFX90A: TotalNumVgprs: totalnumvgprs(.Lf512.num_agpr, .Lf512.num_vgpr)
define amdgpu_kernel void @f512() #512 {
call void @foo()
call void @use256vgprs()
@@ -569,20 +569,20 @@ define amdgpu_kernel void @f512() #512 {
attributes #512 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
; GCN-LABEL: {{^}}f1024:
-; GFX9: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX90A: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX90A: .set f1024.num_agpr, max(64, amdgpu.max_num_agpr)
-; GFX10WGP-WAVE32: .set f1024.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX10WGP-WAVE64: .set f1024.num_vgpr, max(128, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE32: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX10CU-WAVE64: .set f1024.num_vgpr, max(64, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE32: .set f1024.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GFX11WGP-WAVE64: .set f1024.num_vgpr, max(192, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE32: .set f1024.num_vgpr, max(96, amdgpu.max_num_vgpr)
-; GFX11CU-WAVE64: .set f1024.num_vgpr, max(96, amdgpu.max_num_vgpr)
-; GCN: NumVgprs: f1024.num_vgpr
-; GFX90A: NumAgprs: f1024.num_agpr
-; GFX90A: TotalNumVgprs: totalnumvgprs(f1024.num_agpr, f1024.num_vgpr)
+; GFX9: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX90A: .set .Lf1024.num_agpr, max(64, .Lamdgpu.max_num_agpr)
+; GFX10WGP-WAVE32: .set .Lf1024.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX10WGP-WAVE64: .set .Lf1024.num_vgpr, max(128, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE32: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX10CU-WAVE64: .set .Lf1024.num_vgpr, max(64, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE32: .set .Lf1024.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GFX11WGP-WAVE64: .set .Lf1024.num_vgpr, max(192, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE32: .set .Lf1024.num_vgpr, max(96, .Lamdgpu.max_num_vgpr)
+; GFX11CU-WAVE64: .set .Lf1024.num_vgpr, max(96, .Lamdgpu.max_num_vgpr)
+; GCN: NumVgprs: .Lf1024.num_vgpr
+; GFX90A: NumAgprs: .Lf1024.num_agpr
+; GFX90A: TotalNumVgprs: totalnumvgprs(.Lf1024.num_agpr, .Lf1024.num_vgpr)
define amdgpu_kernel void @f1024() #1024 {
call void @foo()
call void @use256vgprs()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
index e8898d6a7001cc..8bc8a7182ff727 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage-agpr.ll
@@ -8,13 +8,13 @@
@alias = hidden alias void (), ptr @aliasee_default
; ALL-LABEL: {{^}}kernel:
-; ALL: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel.num_agpr, kernel.num_vgpr), 1, 0)
-; ALL-NEXT: .amdhsa_next_free_sgpr (max(kernel.numbered_sgpr+(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel.uses_vcc, kernel.uses_flat_scratch, 1))
-; GFX90A-NEXT: .amdhsa_accum_offset ((((((alignto(max(1, kernel.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
+; ALL: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel.num_agpr, .Lkernel.num_vgpr), 1, 0)
+; ALL-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel.numbered_sgpr+(extrasgprs(.Lkernel.uses_vcc, .Lkernel.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel.uses_vcc, .Lkernel.uses_flat_scratch, 1))
+; GFX90A-NEXT: .amdhsa_accum_offset ((((((alignto(max(1, .Lkernel.num_vgpr), 4))/4)-1)&(~65536))&63)+1)*4
-; ALL: .set kernel.num_vgpr, max(32, aliasee_default.num_vgpr)
-; ALL-NEXT: .set kernel.num_agpr, max(0, aliasee_default.num_agpr)
-; ALL-NEXT: .set kernel.numbered_sgpr, max(33, aliasee_default.numbered_sgpr)
+; ALL: .set .Lkernel.num_vgpr, max(32, .Laliasee_default.num_vgpr)
+; ALL-NEXT: .set .Lkernel.num_agpr, max(0, .Laliasee_default.num_agpr)
+; ALL-NEXT: .set .Lkernel.numbered_sgpr, max(33, .Laliasee_default.numbered_sgpr)
define amdgpu_kernel void @kernel() #0 {
bb:
call void @alias() #2
@@ -26,9 +26,9 @@ bb:
call void asm sideeffect "; clobber a26 ", "~{a26}"()
ret void
}
-; ALL: .set aliasee_default.num_vgpr, 0
-; ALL-NEXT: .set aliasee_default.num_agpr, 27
-; ALL-NEXT: .set aliasee_default.numbered_sgpr, 32
+; ALL: .set .Laliasee_default.num_vgpr, 0
+; ALL-NEXT: .set .Laliasee_default.num_agpr, 27
+; ALL-NEXT: .set .Laliasee_default.numbered_sgpr, 32
attributes #0 = { noinline norecurse nounwind optnone }
attributes #1 = { noinline norecurse nounwind readnone willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
index a01268625cedbd..deb0973d37e3dc 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage0.ll
@@ -7,18 +7,18 @@
@alias0 = hidden alias void (), ptr @aliasee_default_vgpr64_sgpr102
; CHECK-LABEL: {{^}}kernel0:
-; CHECK: .set kernel0.num_vgpr, max(32, aliasee_default_vgpr64_sgpr102.num_vgpr)
-; CHECK-NEXT: .set kernel0.num_agpr, max(0, aliasee_default_vgpr64_sgpr102.num_agpr)
-; CHECK-NEXT: .set kernel0.numbered_sgpr, max(33, aliasee_default_vgpr64_sgpr102.numbered_sgpr)
+; CHECK: .set .Lkernel0.num_vgpr, max(32, .Laliasee_default_vgpr64_sgpr102.num_vgpr)
+; CHECK-NEXT: .set .Lkernel0.num_agpr, max(0, .Laliasee_default_vgpr64_sgpr102.num_agpr)
+; CHECK-NEXT: .set .Lkernel0.numbered_sgpr, max(33, .Laliasee_default_vgpr64_sgpr102.numbered_sgpr)
define amdgpu_kernel void @kernel0() #0 {
bb:
call void @alias0() #2
ret void
}
-; CHECK: .set aliasee_default_vgpr64_sgpr102.num_vgpr, 53
-; CHECK-NEXT: .set aliasee_default_vgpr64_sgpr102.num_agpr, 0
-; CHECK-NEXT: .set aliasee_default_vgpr64_sgpr102.numbered_sgpr, 32
+; CHECK: .set .Laliasee_default_vgpr64_sgpr102.num_vgpr, 53
+; CHECK-NEXT: .set .Laliasee_default_vgpr64_sgpr102.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_default_vgpr64_sgpr102.numbered_sgpr, 32
define internal void @aliasee_default_vgpr64_sgpr102() #1 {
bb:
call void asm sideeffect "; clobber v52 ", "~{v52}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
index 86defe3ba7ec08..eeaf04aacead08 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage1.ll
@@ -9,12 +9,12 @@
; The parent kernel has a higher VGPR usage than the possible callees.
; CHECK-LABEL: {{^}}kernel1:
-; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel1.num_agpr, kernel1.num_vgpr), 1, 0)
-; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel1.numbered_sgpr+(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel1.uses_vcc, kernel1.uses_flat_scratch, 1))
+; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel1.num_agpr, .Lkernel1.num_vgpr), 1, 0)
+; CHECK-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel1.numbered_sgpr+(extrasgprs(.Lkernel1.uses_vcc, .Lkernel1.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel1.uses_vcc, .Lkernel1.uses_flat_scratch, 1))
-; CHECK: .set kernel1.num_vgpr, max(41, aliasee_vgpr32_sgpr76.num_vgpr)
-; CHECK-NEXT: .set kernel1.num_agpr, max(0, aliasee_vgpr32_sgpr76.num_agpr)
-; CHECK-NEXT: .set kernel1.numbered_sgpr, max(33, aliasee_vgpr32_sgpr76.numbered_sgpr)
+; CHECK: .set .Lkernel1.num_vgpr, max(41, .Laliasee_vgpr32_sgpr76.num_vgpr)
+; CHECK-NEXT: .set .Lkernel1.num_agpr, max(0, .Laliasee_vgpr32_sgpr76.num_agpr)
+; CHECK-NEXT: .set .Lkernel1.numbered_sgpr, max(33, .Laliasee_vgpr32_sgpr76.numbered_sgpr)
define amdgpu_kernel void @kernel1() #0 {
bb:
call void asm sideeffect "; clobber v40 ", "~{v40}"()
@@ -22,9 +22,9 @@ bb:
ret void
}
-; CHECK: .set aliasee_vgpr32_sgpr76.num_vgpr, 27
-; CHECK-NEXT: .set aliasee_vgpr32_sgpr76.num_agpr, 0
-; CHECK-NEXT: .set aliasee_vgpr32_sgpr76.numbered_sgpr, 32
+; CHECK: .set .Laliasee_vgpr32_sgpr76.num_vgpr, 27
+; CHECK-NEXT: .set .Laliasee_vgpr32_sgpr76.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_vgpr32_sgpr76.numbered_sgpr, 32
define internal void @aliasee_vgpr32_sgpr76() #1 {
bb:
call void asm sideeffect "; clobber v26 ", "~{v26}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
index 6b1fbd9b6e16a2..f8fb4a79768b73 100644
--- a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-alias-register-usage2.ll
@@ -7,21 +7,21 @@
@alias2 = hidden alias void (), ptr @aliasee_vgpr64_sgpr102
; CHECK-LABEL: {{^}}kernel2:
-; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(kernel2.num_agpr, kernel2.num_vgpr), 1, 0)
-; CHECK-NEXT: .amdhsa_next_free_sgpr (max(kernel2.numbered_sgpr+(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(kernel2.uses_vcc, kernel2.uses_flat_scratch, 1))
+; CHECK: .amdhsa_next_free_vgpr max(totalnumvgprs(.Lkernel2.num_agpr, .Lkernel2.num_vgpr), 1, 0)
+; CHECK-NEXT: .amdhsa_next_free_sgpr (max(.Lkernel2.numbered_sgpr+(extrasgprs(.Lkernel2.uses_vcc, .Lkernel2.uses_flat_scratch, 1)), 1, 0))-(extrasgprs(.Lkernel2.uses_vcc, .Lkernel2.uses_flat_scratch, 1))
-; CHECK: .set kernel2.num_vgpr, max(32, aliasee_vgpr64_sgpr102.num_vgpr)
-; CHECK-NEXT: .set kernel2.num_agpr, max(0, aliasee_vgpr64_sgpr102.num_agpr)
-; CHECK-NEXT: .set kernel2.numbered_sgpr, max(33, aliasee_vgpr64_sgpr102.numbered_sgpr)
+; CHECK: .set .Lkernel2.num_vgpr, max(32, .Laliasee_vgpr64_sgpr102.num_vgpr)
+; CHECK-NEXT: .set .Lkernel2.num_agpr, max(0, .Laliasee_vgpr64_sgpr102.num_agpr)
+; CHECK-NEXT: .set .Lkernel2.numbered_sgpr, max(33, .Laliasee_vgpr64_sgpr102.numbered_sgpr)
define amdgpu_kernel void @kernel2() #0 {
bb:
call void @alias2() #2
ret void
}
-; CHECK: .set aliasee_vgpr64_sgpr102.num_vgpr, 53
-; CHECK-NEXT: .set aliasee_vgpr64_sgpr102.num_agpr, 0
-; CHECK-NEXT: .set aliasee_vgpr64_sgpr102.numbered_sgpr, 32
+; CHECK: .set .Laliasee_vgpr64_sgpr102.num_vgpr, 53
+; CHECK-NEXT: .set .Laliasee_vgpr64_sgpr102.num_agpr, 0
+; CHECK-NEXT: .set .Laliasee_vgpr64_sgpr102.numbered_sgpr, 32
define internal void @aliasee_vgpr64_sgpr102() #1 {
bb:
call void asm sideeffect "; clobber v52 ", "~{v52}"()
diff --git a/llvm/test/CodeGen/AMDGPU/call-alias-register-usage3.ll b/llvm/test/CodeGen/AMDGPU/call-alias-regis...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This would need to consider the visibility of the functions. If they are unconditionally private, you can't use this for object linking
I should add that none of the resource information symbols are currently (i.e., at the time of writing) used for object linking and only reproduces the prior functionality of resolving module scope resource information. I guess whether these are to be made local or not depends on whether the object linking solution with resource info is going to be immediately bespoke, or will use these symbols directly (either way I expect these symbols to be used, whether directly or through some bespoke resource info linking structure/ABI). |
I'm wondering what the best way to handle this would be. My guess would be a struct of metadata per function, then we emit that something in the ELF (Maybe a separate section). Then we can probably re-use the profile call-graph support to just create edges between functions and look up their metadata. |
This is how I assume it would work |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should probably consolidate these fields into one struct for whatever the final ABI ends up being.
In the meantime I think we should just follow along with the visibility of the function
…on't end up in the object file
4ffe605
to
78c6218
Compare
@@ -357,6 +357,7 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) { | |||
} | |||
|
|||
void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) { | |||
bool isLocal = F.hasLocalLinkage(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sink below the early return to where it's used, and capitalize
Change scope of resource usage info MC symbols such that they don't end up in the object file (i.e., as
*ABS*
).I have tried putting them into their own sections and have the sections omitted but since they're symbolic representations they don't actually end up in any of the sections (let me know if I'm missing something obvious with the section approach, however).