Skip to content

Commit 4c171fb

Browse files
committed
AMDGPU: Do not infer implicit inputs for !nocallback intrinsics
This isn't really the right check, we want to know that the intrinsic does not perform a true function call to any code (in the module or not). nocallback appears to be the closest thing to this property we have now though. Also do the same for inferring no-agpr usage.
1 parent 21f0717 commit 4c171fb

File tree

3 files changed

+104
-11
lines changed

3 files changed

+104
-11
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,10 @@ enum ImplicitArgumentPositions {
4848
#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
4949

5050
enum ImplicitArgumentMask {
51-
NOT_IMPLICIT_INPUT = 0,
51+
UNKNOWN_INTRINSIC = 0,
5252
#include "AMDGPUAttributes.def"
53-
ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
53+
ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
54+
NOT_IMPLICIT_INPUT
5455
};
5556

5657
#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -118,7 +119,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
118119
NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
119120
return QUEUE_PTR;
120121
default:
121-
return NOT_IMPLICIT_INPUT;
122+
return UNKNOWN_INTRINSIC;
122123
}
123124
}
124125

@@ -522,6 +523,21 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
522523
ImplicitArgumentMask AttrMask =
523524
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
524525
HasApertureRegs, SupportsGetDoorbellID, COV);
526+
527+
if (AttrMask == UNKNOWN_INTRINSIC) {
528+
// Assume not-nocallback intrinsics may invoke a function which accesses
529+
// implicit arguments.
530+
//
531+
// FIXME: This isn't really the correct check. We want to ensure it
532+
// isn't calling any function that may use implicit arguments regardless
533+
// of whether it's internal to the module or not.
534+
//
535+
// TODO: Ignoring callsite attributes.
536+
if (!Callee->hasFnAttribute(Attribute::NoCallback))
537+
return indicatePessimisticFixpoint();
538+
continue;
539+
}
540+
525541
if (AttrMask != NOT_IMPLICIT_INPUT) {
526542
if ((IsNonEntryFunc || !NonKernelOnly))
527543
removeAssumedBits(AttrMask);
@@ -1282,8 +1298,11 @@ struct AAAMDGPUNoAGPR
12821298

12831299
// Some intrinsics may use AGPRs, but if we have a choice, we are not
12841300
// required to use AGPRs.
1285-
if (Callee->isIntrinsic())
1286-
return true;
1301+
if (Callee->isIntrinsic()) {
1302+
// Assume !nocallback intrinsics may call a function which requires
1303+
// AGPRs.
1304+
return CB.hasFnAttr(Attribute::NoCallback);
1305+
}
12871306

12881307
// TODO: Handle callsite attributes
12891308
const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
22
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
33

44
; Make sure we do not infer anything about implicit inputs through an
@@ -8,7 +8,7 @@ declare zeroext i32 @return_i32()
88

99
define i32 @test_i32_return() gc "statepoint-example" {
1010
; CHECK-LABEL: define i32 @test_i32_return(
11-
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] gc "statepoint-example" {
11+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" {
1212
; CHECK-NEXT: [[ENTRY:.*:]]
1313
; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
1414
; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]])
@@ -25,8 +25,7 @@ declare i32 @llvm.experimental.gc.result.i32(token) #0
2525

2626
attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
2727
;.
28-
; CHECK: attributes #[[ATTR0:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
29-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
30-
; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" }
31-
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
28+
; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
29+
; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" }
30+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
3231
;.
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
2+
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s
3+
4+
; Make sure we infer no inputs are used through some intrinsics
5+
6+
define void @use_fake_use(i32 %arg) {
7+
; CHECK-LABEL: define void @use_fake_use(
8+
; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[ARG]])
10+
; CHECK-NEXT: ret void
11+
;
12+
call void (...) @llvm.fake.use(i32 %arg)
13+
ret void
14+
}
15+
16+
define void @use_donothing() {
17+
; CHECK-LABEL: define void @use_donothing(
18+
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
19+
; CHECK-NEXT: call void @llvm.donothing()
20+
; CHECK-NEXT: ret void
21+
;
22+
call void @llvm.donothing()
23+
ret void
24+
}
25+
26+
define void @use_assume(i1 %arg) {
27+
; CHECK-LABEL: define void @use_assume(
28+
; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR1]] {
29+
; CHECK-NEXT: call void @llvm.assume(i1 [[ARG]])
30+
; CHECK-NEXT: ret void
31+
;
32+
call void @llvm.assume(i1 %arg)
33+
ret void
34+
}
35+
36+
define void @use_trap() {
37+
; CHECK-LABEL: define void @use_trap(
38+
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
39+
; CHECK-NEXT: call void @llvm.trap()
40+
; CHECK-NEXT: ret void
41+
;
42+
call void @llvm.trap()
43+
ret void
44+
}
45+
46+
define void @use_debugtrap() {
47+
; CHECK-LABEL: define void @use_debugtrap(
48+
; CHECK-SAME: ) #[[ATTR2]] {
49+
; CHECK-NEXT: call void @llvm.debugtrap()
50+
; CHECK-NEXT: ret void
51+
;
52+
call void @llvm.debugtrap()
53+
ret void
54+
}
55+
56+
define void @use_ubsantrap() {
57+
; CHECK-LABEL: define void @use_ubsantrap(
58+
; CHECK-SAME: ) #[[ATTR2]] {
59+
; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
60+
; CHECK-NEXT: ret void
61+
;
62+
call void @llvm.ubsantrap(i8 0)
63+
ret void
64+
}
65+
66+
;.
67+
; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
68+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
69+
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
70+
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
71+
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
72+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
73+
; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
74+
; CHECK: attributes #[[ATTR7:[0-9]+]] = { cold noreturn nounwind "target-cpu"="gfx90a" }
75+
;.

0 commit comments

Comments
 (0)