-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DXIL] Add GroupMemoryBarrierWithGroupSync intrinsic #111884
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-backend-directx Author: Adam Yang (adam-yang) Changespartially fixes #70103 Changes
Related PRs
Full diff: https://github.com/llvm/llvm-project/pull/111884.diff 5 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td
index f2b9e286ebb476..275a61049980e4 100644
--- a/llvm/include/llvm/IR/IntrinsicsDirectX.td
+++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td
@@ -87,4 +87,6 @@ def int_dx_wave_is_first_lane : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrCon
def int_dx_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>;
def int_dx_step : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>], [IntrNoMem]>;
def int_dx_radians : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
+def int_dx_group_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], []>;
}
diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td
index 9aa0af3e3a6b17..756621840a8453 100644
--- a/llvm/lib/Target/DirectX/DXIL.td
+++ b/llvm/lib/Target/DirectX/DXIL.td
@@ -277,6 +277,7 @@ def IsFeedback : DXILAttribute;
def IsWave : DXILAttribute;
def NeedsUniformInputs : DXILAttribute;
def IsBarrier : DXILAttribute;
+def NoDuplicate : DXILAttribute;
class Overloads<Version ver, list<DXILOpParamType> ols> {
Version dxil_version = ver;
@@ -801,3 +802,12 @@ def WaveIsFirstLane : DXILOp<110, waveIsFirstLane> {
let stages = [Stages<DXIL1_0, [all_stages]>];
let attributes = [Attributes<DXIL1_0, [ReadNone]>];
}
+
+def Barrier : DXILOp<80, barrier> {
+ let Doc = "inserts a memory barrier in the shader";
+ let arguments = [Int32Ty];
+ let result = VoidTy;
+ let stages = [Stages<DXIL1_0, [compute, library]>];
+ let attributes = [Attributes<DXIL1_0, [NoDuplicate]>];
+}
+
diff --git a/llvm/lib/Target/DirectX/DXILConstants.h b/llvm/lib/Target/DirectX/DXILConstants.h
index 022cd57795a063..38984727761bb3 100644
--- a/llvm/lib/Target/DirectX/DXILConstants.h
+++ b/llvm/lib/Target/DirectX/DXILConstants.h
@@ -30,6 +30,13 @@ enum class OpParamType : unsigned {
#include "DXILOperation.inc"
};
+enum class BarrierMode : unsigned {
+ SyncThreadGroup = 0x00000001,
+ UAVFenceGlobal = 0x00000002,
+ UAVFenceThreadGroup = 0x00000004,
+ TGSMFence = 0x00000008,
+};
+
} // namespace dxil
} // namespace llvm
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index c0f8d433833ee7..f3ff372af8d201 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -210,6 +210,29 @@ class OpLowerer {
});
}
+ [[nodiscard]] bool lowerBarrier(Function &F, Intrinsic::ID IntrId,
+ ArrayRef<dxil::BarrierMode> BarrierModes) {
+ unsigned BarrierMode = 0;
+ for (const dxil::BarrierMode B : BarrierModes) {
+ BarrierMode |= (unsigned)B;
+ }
+ IRBuilder<> &IRB = OpBuilder.getIRB();
+ return replaceFunction(F, [&](CallInst *CI) -> Error {
+ std::array<Value *, 1> Args{IRB.getInt32(BarrierMode)};
+
+ IRB.SetInsertPoint(CI);
+ Expected<CallInst *> OpCall =
+ OpBuilder.tryCreateOp(OpCode::Barrier, Args, CI->getName());
+ if (Error E = OpCall.takeError())
+ return E;
+
+ CI->replaceAllUsesWith(OpCall.get());
+ CI->eraseFromParent();
+
+ return Error::success();
+ });
+ }
+
[[nodiscard]] bool lowerToBindAndAnnotateHandle(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
@@ -477,6 +500,11 @@ class OpLowerer {
HasErrors |= replaceFunctionWithOp(F, OpCode); \
break;
#include "DXILOperation.inc"
+ case Intrinsic::dx_group_memory_barrier_with_group_sync:
+ HasErrors |= lowerBarrier(
+ F, ID,
+ {dxil::BarrierMode::TGSMFence, dxil::BarrierMode::SyncThreadGroup});
+ break;
case Intrinsic::dx_handle_fromBinding:
HasErrors |= lowerHandleFromBinding(F);
break;
diff --git a/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
new file mode 100644
index 00000000000000..48907647c660f8
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
@@ -0,0 +1,8 @@
+; RUN: opt -S -dxil-op-lower -mtriple=dxil-pc-shadermodel6.3-library < %s | FileCheck %s --check-prefix=CHECK
+
+define void @test_group_memory_barrier_with_group_sync() {
+entry:
+ ; CHECK: call void @dx.op.barrier(i32 80, i32 9)
+ call void @llvm.dx.group.memory.barrier.with.group.sync()
+ ret void
+}
\ No newline at end of file
|
12723f3
to
7ee8d54
Compare
@@ -477,6 +500,11 @@ class OpLowerer { | |||
HasErrors |= replaceFunctionWithOp(F, OpCode); \ | |||
break; | |||
#include "DXILOperation.inc" | |||
case Intrinsic::dx_group_memory_barrier_with_group_sync: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we could also approach this as an intrinsic expansion. So we would create a DirectX intrinsic dx_group_memory_barrier
that would lower here. Then we would deal with dx_group_memory_barrier_with_group_sync
and all the other barrier ops as an intrinsic expansion with the applicable opcode and barrier modes.
I think that would be clearer when adding future barrier ops and match the flow of other intrinsics. WDYT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah make sense. I wasn't aware of the precedence of expanding one intrinsic to another, so it maps directly to a dx op.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't really like this pattern of defining extra llvm intrinsics just for a lowering phase.
As far as I can tell there are a few ways to design the intrinsics for barriers:
- Have one llvm intrinsic with arguments for the barrier mode. This would either need to match DXIL's mask (which means we need to expose the
BarrierMode
enum inDXILABI.h
) or we'd still need custom lowering to translate it - Have the LLVM intrinsics map to the HLSL operations and figure out the mask for the DXIL operation later.
- 3 Have both of these representations and thus potentially have to handle them both throughout the compiler
I prefer (2), as (1) makes lowering to SPIR-V a bit messier in that it needs to be aware of random DXIL enums. (3) really just means we have to deal with both, which seems worse to me.
So if that's the case then the question is can we avoid needing custom lowering in DXILOpLowering to map the intrinsic to the barrier masks? Can we enhance DXIL.td to embed this information so the generic lowering can take care of this? Or can make replaceFunctionWithOp more modular and have a way to pass it extra arguments to pass on?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here is an implementation where we can do (2) with just tablegen:
This reverts commit c7d83cf.
…y to the barrier dxil op
0f65208
to
c5c5d75
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Just some opinionated nits.
llvm/test/CodeGen/DirectX/group_memory_barrier_with_group_sync.ll
Outdated
Show resolved
Hide resolved
int value = value_; | ||
} | ||
|
||
defset list<DXILConstant> BarrierModes = { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is DXILConstant
added out of necessity? Or can you do defset list<int> BarrierModes ...
?
Was it added for better documentation/extensibility?
Otherwise, we could remove index
from the Arg
class and replace the the is...
bits with a type enum to match the ArgSelect
struct directly and I think it would allow for more concise code in DXILEmitter.cpp
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is. I don't think there's a way for definitions to be just a primitive type like int; it has to be instantiation of a class, which in turn has int members. I could be wrong though.
partially fixes #70103 ### Changes * Added int_spv_group_memory_barrier_with_group_sync intrinsic in IntrinsicsSPIRV.td * Added lowering for int_spv_group_memory_barrier_with_group_sync in SPIRVInstructionSelector.cpp * Added SPIRV backend test case ### Related PRs * [[clang][HLSL] Add GroupMemoryBarrierWithGroupSync intrinsic #111883](#111883) * [[DXIL] Add GroupMemoryBarrierWithGroupSync intrinsic #111884](#111884)
This reverts commit 9a5b3a1.
fixes llvm#112974 partially fixes llvm#70103 ### Changes - Added new tablegen based way of lowering dx intrinsics to DXIL ops. - Added int_dx_group_memory_barrier_with_group_sync intrinsic in IntrinsicsDirectX.td - Added expansion for int_dx_group_memory_barrier_with_group_sync in DXILIntrinsicExpansion.cpp` - Added DXIL backend test case ### Related PRs * [[clang][HLSL] Add GroupMemoryBarrierWithGroupSync intrinsic llvm#111883](llvm#111883) * [[SPIRV] Add GroupMemoryBarrierWithGroupSync intrinsic llvm#111888](llvm#111888)
partially fixes llvm#70103 ### Changes * Added int_spv_group_memory_barrier_with_group_sync intrinsic in IntrinsicsSPIRV.td * Added lowering for int_spv_group_memory_barrier_with_group_sync in SPIRVInstructionSelector.cpp * Added SPIRV backend test case ### Related PRs * [[clang][HLSL] Add GroupMemoryBarrierWithGroupSync intrinsic llvm#111883](llvm#111883) * [[DXIL] Add GroupMemoryBarrierWithGroupSync intrinsic llvm#111884](llvm#111884)
partially fixes #70103 ### Changes * Implemented `GroupMemoryBarrierWithGroupSync` clang builtin * Linked `GroupMemoryBarrierWithGroupSync` clang builtin with `hlsl_intrinsics.h` * Added sema checks for `GroupMemoryBarrierWithGroupSync` to `CheckHLSLBuiltinFunctionCall` in `SemaChecking.cpp` * Add codegen for `GroupMemoryBarrierWithGroupSync` to `EmitHLSLBuiltinExpr` in `CGBuiltin.cpp` * Add codegen tests to `clang/test/CodeGenHLSL/builtins/GroupMemoryBarrierWithGroupSync.hlsl` * Add sema tests to `clang/test/SemaHLSL/BuiltIns/GroupMemoryBarrierWithGroupSync-errors.hlsl` ### Related PRs * [[DXIL] Add GroupMemoryBarrierWithGroupSync intrinsic #111884](#111884) * [[SPIRV] Add GroupMemoryBarrierWithGroupSync intrinsic #111888](#111888)
fixes #112974
partially fixes #70103
Changes
Related PRs