Skip to content

Commit 9d4094a

Browse files
authored
[AMDGPU] Add llvm.amdgcn.set.inactive.chain.arg intrinsic (#71530)
Add a new intrinsic, similar to llvm.amdgcn.set.inactive, but used only in functions with the `amdgpu_cs_chain` or `amdgpu_cs_chain_preserve` calling conventions. It allows setting the inactive lanes to those of a value received as a VGPR argument (whereas llvm.amdgcn.set.inactive usually takes a constant as the value of the inactive lanes). Differential Revision: https://reviews.llvm.org/D158604
1 parent e2f1a95 commit 9d4094a

File tree

3 files changed

+95
-0
lines changed

3 files changed

+95
-0
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2135,6 +2135,17 @@ def int_amdgcn_set_inactive :
21352135
LLVMMatchType<0>], // value for the inactive lanes to take
21362136
[IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
21372137

2138+
// Similar to int_amdgcn_set_inactive, but the value for the inactive lanes must
2139+
// be a VGPR function argument.
2140+
// Can only be used in functions with the `amdgpu_cs_chain` or
2141+
// `amdgpu_cs_chain_preserve` calling conventions, and only in uniform control
2142+
// flow.
2143+
def int_amdgcn_set_inactive_chain_arg :
2144+
Intrinsic<[llvm_anyint_ty],
2145+
[LLVMMatchType<0>, // value to be copied
2146+
LLVMMatchType<0>], // value for the inactive lanes to take
2147+
[IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
2148+
21382149
// Return if the given flat pointer points to a local memory address.
21392150
def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">,
21402151
DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],

llvm/lib/IR/Verifier.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5987,6 +5987,30 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
59875987
"VGPR arguments must not have the `inreg` attribute", &Call);
59885988
break;
59895989
}
5990+
case Intrinsic::amdgcn_set_inactive_chain_arg: {
5991+
auto CallerCC = Call.getCaller()->getCallingConv();
5992+
switch (CallerCC) {
5993+
case CallingConv::AMDGPU_CS_Chain:
5994+
case CallingConv::AMDGPU_CS_ChainPreserve:
5995+
break;
5996+
default:
5997+
CheckFailed("Intrinsic can only be used from functions with the "
5998+
"amdgpu_cs_chain or amdgpu_cs_chain_preserve "
5999+
"calling conventions",
6000+
&Call);
6001+
break;
6002+
}
6003+
6004+
unsigned InactiveIdx = 1;
6005+
Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg),
6006+
"Value for inactive lanes must not have the `inreg` attribute",
6007+
&Call);
6008+
Check(isa<Argument>(Call.getArgOperand(InactiveIdx)),
6009+
"Value for inactive lanes must be a function argument", &Call);
6010+
Check(!cast<Argument>(Call.getArgOperand(InactiveIdx))->hasInRegAttr(),
6011+
"Value for inactive lanes must be a VGPR function argument", &Call);
6012+
break;
6013+
}
59906014
case Intrinsic::experimental_convergence_entry:
59916015
LLVM_FALLTHROUGH;
59926016
case Intrinsic::experimental_convergence_anchor:

llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
22

33
declare void @llvm.amdgcn.cs.chain(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) noreturn
4+
declare i32 @llvm.amdgcn.set.inactive.chain.arg(i32, i32) convergent willreturn nofree nocallback readnone
45

56
define amdgpu_cs_chain void @bad_flags(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) {
67
; CHECK: immarg operand has non-immediate parameter
@@ -32,29 +33,88 @@ define amdgpu_cs_chain void @bad_exec(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr,
3233
}
3334

3435
define void @bad_caller_default_cc(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
36+
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
37+
; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
38+
%unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
39+
3540
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
3641
; CHECK-NEXT: @llvm.amdgcn.cs.chain
3742
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
3843
unreachable
3944
}
4045

4146
define amdgpu_kernel void @bad_caller_amdgpu_kernel(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
47+
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
48+
; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
49+
%unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
50+
4251
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
4352
; CHECK-NEXT: @llvm.amdgcn.cs.chain
4453
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
4554
unreachable
4655
}
4756

4857
define amdgpu_gfx void @bad_caller_amdgpu_gfx(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
58+
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
59+
; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
60+
%unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
61+
4962
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
5063
; CHECK-NEXT: @llvm.amdgcn.cs.chain
5164
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
5265
unreachable
5366
}
5467

5568
define amdgpu_vs void @bad_caller_amdgpu_vs(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
69+
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
70+
; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
71+
%unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
72+
5673
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
5774
; CHECK-NEXT: @llvm.amdgcn.cs.chain
5875
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
5976
unreachable
6077
}
78+
79+
define amdgpu_cs void @bad_caller_amdgpu_cs(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
80+
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
81+
; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
82+
%unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
83+
84+
; Unlike llvm.amdgcn.set.inactive.chain.arg, llvm.amdgcn.cs.chain may be called from amdgpu_cs functions.
85+
86+
ret void
87+
}
88+
89+
define amdgpu_cs_chain void @set_inactive_chain_arg_sgpr(ptr addrspace(1) %out, i32 %active, i32 inreg %inactive) {
90+
; CHECK: Value for inactive lanes must be a VGPR function argument
91+
; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
92+
%tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 %inactive) #0
93+
store i32 %tmp, ptr addrspace(1) %out
94+
ret void
95+
}
96+
97+
define amdgpu_cs_chain void @set_inactive_chain_arg_const(ptr addrspace(1) %out, i32 %active) {
98+
; CHECK: Value for inactive lanes must be a function argument
99+
; CHECK-NEXT: llvm.amdgcn.set.inactive.chain.arg
100+
%tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 29) #0
101+
store i32 %tmp, ptr addrspace(1) %out
102+
ret void
103+
}
104+
105+
define amdgpu_cs_chain void @set_inactive_chain_arg_computed(ptr addrspace(1) %out, i32 %active) {
106+
; CHECK: Value for inactive lanes must be a function argument
107+
; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
108+
%inactive = add i32 %active, 127
109+
%tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 %inactive) #0
110+
store i32 %tmp, ptr addrspace(1) %out
111+
ret void
112+
}
113+
114+
define amdgpu_cs_chain void @set_inactive_chain_arg_inreg(ptr addrspace(1) %out, i32 %active, i32 %inactive) {
115+
; CHECK: Value for inactive lanes must not have the `inreg` attribute
116+
; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
117+
%tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 inreg %inactive) #0
118+
store i32 %tmp, ptr addrspace(1) %out
119+
ret void
120+
}

0 commit comments

Comments
 (0)