Skip to content

[Attributor][AMDGPU] Enable AAIndirectCallInfo for AMDAttributor #100952

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions llvm/include/llvm/Transforms/IPO/Attributor.h
Original file line number Diff line number Diff line change
Expand Up @@ -1448,7 +1448,7 @@ struct AttributorConfig {
/// Callback function to determine if an indirect call targets should be made
/// direct call targets (with an if-cascade).
std::function<bool(Attributor &A, const AbstractAttribute &AA, CallBase &CB,
Function &AssummedCallee)>
Function &AssumedCallee, unsigned NumAssumedCallees)>
IndirectCalleeSpecializationCallback = nullptr;

/// Helper to update an underlying call graph and to delete functions.
Expand Down Expand Up @@ -1718,10 +1718,11 @@ struct Attributor {
/// Return true if we should specialize the call site \b CB for the potential
/// callee \p Fn.
bool shouldSpecializeCallSiteForCallee(const AbstractAttribute &AA,
CallBase &CB, Function &Callee) {
CallBase &CB, Function &Callee,
unsigned NumAssumedCallees) {
return Configuration.IndirectCalleeSpecializationCallback
? Configuration.IndirectCalleeSpecializationCallback(*this, AA,
CB, Callee)
? Configuration.IndirectCalleeSpecializationCallback(
*this, AA, CB, Callee, NumAssumedCallees)
: true;
}

Expand Down
15 changes: 14 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
Expand Down Expand Up @@ -1038,12 +1039,24 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {
&AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
&AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,
&AAPointerInfo::ID, &AAPotentialConstantValues::ID,
&AAUnderlyingObjects::ID});
&AAUnderlyingObjects::ID, &AAIndirectCallInfo::ID, &AAInstanceInfo::ID});

AttributorConfig AC(CGUpdater);
AC.Allowed = &Allowed;
AC.IsModulePass = true;
AC.DefaultInitializeLiveInternals = false;
AC.IndirectCalleeSpecializationCallback =
[&TM](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
Function &Callee, unsigned NumAssumedCallees) {
if (AMDGPU::isEntryFunctionCC(Callee.getCallingConv()))
return false;
// Singleton functions can be specialized.
if (NumAssumedCallees == 1)
return true;
// Otherwise specialize uniform values.
const auto &TTI = TM.getTargetTransformInfo(*CB.getCaller());
return TTI.isAlwaysUniform(CB.getCalledOperand());
};
AC.IPOAmendableCB = [](const Function &F) {
return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
};
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/IPO/Attributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3836,7 +3836,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
if (MaxSpecializationPerCB.getNumOccurrences()) {
AC.IndirectCalleeSpecializationCallback =
[&](Attributor &, const AbstractAttribute &AA, CallBase &CB,
Function &Callee) {
Function &Callee, unsigned) {
if (MaxSpecializationPerCB == 0)
return false;
auto &Set = IndirectCalleeTrackingMap[&CB];
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/IPO/AttributorAttributes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12347,7 +12347,8 @@ struct AAIndirectCallInfoCallSite : public AAIndirectCallInfo {
SmallVector<Function *, 8> SkippedAssumedCallees;
SmallVector<std::pair<CallInst *, Instruction *>> NewCalls;
for (Function *NewCallee : AssumedCallees) {
if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee)) {
if (!A.shouldSpecializeCallSiteForCallee(*this, *CB, *NewCallee,
AssumedCallees.size())) {
SkippedAssumedCallees.push_back(NewCallee);
SpecializedForAllCallees = false;
continue;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define internal void @direct() {
; CHECK-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
; CHECK-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
; CHECK-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
; CHECK-NEXT: call void [[FP]]()
; CHECK-NEXT: call void @indirect()
; CHECK-NEXT: ret void
;
%fptr = alloca ptr, addrspace(5)
Expand All @@ -36,5 +36,5 @@ define amdgpu_kernel void @test_direct_indirect_call() {
}
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
;.
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca ptr, align 8, addrspace(5)
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr addrspace(5) [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr addrspace(5) [[FPTR]], align 8
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
; ATTRIBUTOR_GCN-NEXT: call void @indirect()
; ATTRIBUTOR_GCN-NEXT: ret void
;
%fptr = alloca ptr, addrspace(5)
Expand All @@ -43,5 +43,5 @@ attributes #0 = { "amdgpu-no-dispatch-id" }
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-no-dispatch-id" "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
66 changes: 66 additions & 0 deletions llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s | FileCheck --check-prefixes=CHECK,OW %s
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -attributor-assume-closed-world=1 %s | FileCheck --check-prefixes=CHECK,CW %s

target datalayout = "A5"

@G = global i32 0, align 4

;.
; CHECK: @G = global i32 0, align 4
;.
define void @bar() {
; CHECK-LABEL: define {{[^@]+}}@bar
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: store i32 1, ptr @G, align 4
; CHECK-NEXT: ret void
;
entry:
store i32 1, ptr @G, align 4
ret void
}

define ptr @helper() {
; CHECK-LABEL: define {{[^@]+}}@helper
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: ret ptr @bar
;
entry:
ret ptr @bar
}

define amdgpu_kernel void @foo(ptr noundef %fp) {
; OW-LABEL: define {{[^@]+}}@foo
; OW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
; OW-NEXT: entry:
; OW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; OW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
; OW-NEXT: call void [[FP]]()
; OW-NEXT: ret void
;
; CW-LABEL: define {{[^@]+}}@foo
; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
; CW-NEXT: entry:
; CW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; CW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
; CW-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8
; CW-NEXT: call void @bar()
; CW-NEXT: ret void
;
entry:
%fp.addr = alloca ptr, addrspace(5)
store ptr %fp, ptr addrspace(5) %fp.addr
%load = load ptr, ptr addrspace(5) %fp.addr
call void %load()
ret void
}

;.
; OW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
; CW: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; CW: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
14 changes: 9 additions & 5 deletions llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ define amdgpu_kernel void @test_simple_indirect_call() {
; ATTRIBUTOR_GCN-NEXT: [[FPTR_CAST:%.*]] = addrspacecast ptr addrspace(5) [[FPTR]] to ptr
; ATTRIBUTOR_GCN-NEXT: store ptr @indirect, ptr [[FPTR_CAST]], align 8
; ATTRIBUTOR_GCN-NEXT: [[FP:%.*]] = load ptr, ptr [[FPTR_CAST]], align 8
; ATTRIBUTOR_GCN-NEXT: call void [[FP]]()
; ATTRIBUTOR_GCN-NEXT: call void @indirect()
; ATTRIBUTOR_GCN-NEXT: ret void
;
; GFX9-LABEL: test_simple_indirect_call:
Expand Down Expand Up @@ -75,12 +75,16 @@ define amdgpu_kernel void @test_simple_indirect_call() {
ret void
}


!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-calls" "amdgpu-stack-objects" }
;.
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
; AKF_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
; ATTRIBUTOR_GCN: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
Loading