Skip to content

AMDGPU: Do not infer implicit inputs for !nocallback intrinsics #131759

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ enum ImplicitArgumentPositions {
#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,

enum ImplicitArgumentMask {
NOT_IMPLICIT_INPUT = 0,
UNKNOWN_INTRINSIC = 0,
#include "AMDGPUAttributes.def"
ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
NOT_IMPLICIT_INPUT
};

#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
Expand Down Expand Up @@ -118,7 +119,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
return QUEUE_PTR;
default:
return NOT_IMPLICIT_INPUT;
return UNKNOWN_INTRINSIC;
}
}

Expand Down Expand Up @@ -522,6 +523,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
ImplicitArgumentMask AttrMask =
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
HasApertureRegs, SupportsGetDoorbellID, COV);

if (AttrMask == UNKNOWN_INTRINSIC) {
// Assume not-nocallback intrinsics may invoke a function which accesses
// implicit arguments.
//
// FIXME: This isn't really the correct check. We want to ensure it
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs an iterative approach, similar to other AAs, and then propagate the "attribute".

// isn't calling any function that may use implicit arguments regardless
// of whether it's internal to the module or not.
//
// TODO: Ignoring callsite attributes.
if (!Callee->hasFnAttribute(Attribute::NoCallback))
return indicatePessimisticFixpoint();
continue;
}

if (AttrMask != NOT_IMPLICIT_INPUT) {
if ((IsNonEntryFunc || !NonKernelOnly))
removeAssumedBits(AttrMask);
Expand Down Expand Up @@ -1282,8 +1298,11 @@ struct AAAMDGPUNoAGPR

// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
if (Callee->isIntrinsic())
return true;
if (Callee->isIntrinsic()) {
// Assume !nocallback intrinsics may call a function which requires
// AGPRs.
return CB.hasFnAttr(Attribute::NoCallback);
}

// TODO: Handle callsite attributes
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s

; Make sure we do not infer anything about implicit inputs through an
; intrinsic call which is not nocallback.

declare zeroext i32 @return_i32()

define i32 @test_i32_return() gc "statepoint-example" {
; CHECK-LABEL: define i32 @test_i32_return(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]])
; CHECK-NEXT: ret i32 [[CALL1]]
;
entry:
%safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
%call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
ret i32 %call1
}

declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...)
declare i32 @llvm.experimental.gc.result.i32(token) #0

attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
;.
; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
;.
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s

; Make sure we infer no inputs are used through some intrinsics

define void @use_fake_use(i32 %arg) {
; CHECK-LABEL: define void @use_fake_use(
; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[ARG]])
; CHECK-NEXT: ret void
;
call void (...) @llvm.fake.use(i32 %arg)
ret void
}

define void @use_donothing() {
; CHECK-LABEL: define void @use_donothing(
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: ret void
;
call void @llvm.donothing()
ret void
}

define void @use_assume(i1 %arg) {
; CHECK-LABEL: define void @use_assume(
; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: call void @llvm.assume(i1 [[ARG]])
; CHECK-NEXT: ret void
;
call void @llvm.assume(i1 %arg)
ret void
}

define void @use_trap() {
; CHECK-LABEL: define void @use_trap(
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: ret void
;
call void @llvm.trap()
ret void
}

define void @use_debugtrap() {
; CHECK-LABEL: define void @use_debugtrap(
; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: call void @llvm.debugtrap()
; CHECK-NEXT: ret void
;
call void @llvm.debugtrap()
ret void
}

define void @use_ubsantrap() {
; CHECK-LABEL: define void @use_ubsantrap(
; CHECK-SAME: ) #[[ATTR2]] {
; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
; CHECK-NEXT: ret void
;
call void @llvm.ubsantrap(i8 0)
ret void
}

;.
; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR7:[0-9]+]] = { cold noreturn nounwind "target-cpu"="gfx90a" }
;.
Loading