-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DirectX] Lower @llvm.dx.typedBufferStore
to DXIL ops
#104253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e1b20f2
90c7fda
4613ca7
c45511e
ae6e0b8
b1c9a23
0b21d55
d11cb1d
d49a863
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -82,8 +82,11 @@ class OpLowerer { | |
public: | ||
OpLowerer(Module &M, DXILResourceMap &DRM) : M(M), OpBuilder(M), DRM(DRM) {} | ||
|
||
void replaceFunction(Function &F, | ||
llvm::function_ref<Error(CallInst *CI)> ReplaceCall) { | ||
/// Replace every call to \c F using \c ReplaceCall, and then erase \c F. If | ||
/// there is an error replacing a call, we emit a diagnostic and return true. | ||
[[nodiscard]] bool | ||
replaceFunction(Function &F, | ||
llvm::function_ref<Error(CallInst *CI)> ReplaceCall) { | ||
for (User *U : make_early_inc_range(F.users())) { | ||
CallInst *CI = dyn_cast<CallInst>(U); | ||
if (!CI) | ||
|
@@ -94,16 +97,18 @@ class OpLowerer { | |
DiagnosticInfoUnsupported Diag(*CI->getFunction(), Message, | ||
CI->getDebugLoc()); | ||
M.getContext().diagnose(Diag); | ||
continue; | ||
return true; | ||
} | ||
} | ||
if (F.user_empty()) | ||
F.eraseFromParent(); | ||
return false; | ||
} | ||
|
||
void replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp) { | ||
[[nodiscard]] | ||
bool replaceFunctionWithOp(Function &F, dxil::OpCode DXILOp) { | ||
bool IsVectorArgExpansion = isVectorArgExpansion(F); | ||
replaceFunction(F, [&](CallInst *CI) -> Error { | ||
return replaceFunction(F, [&](CallInst *CI) -> Error { | ||
SmallVector<Value *> Args; | ||
OpBuilder.getIRB().SetInsertPoint(CI); | ||
if (IsVectorArgExpansion) { | ||
|
@@ -175,12 +180,12 @@ class OpLowerer { | |
CleanupCasts.clear(); | ||
} | ||
|
||
void lowerToCreateHandle(Function &F) { | ||
[[nodiscard]] bool lowerToCreateHandle(Function &F) { | ||
IRBuilder<> &IRB = OpBuilder.getIRB(); | ||
Type *Int8Ty = IRB.getInt8Ty(); | ||
Type *Int32Ty = IRB.getInt32Ty(); | ||
|
||
replaceFunction(F, [&](CallInst *CI) -> Error { | ||
return replaceFunction(F, [&](CallInst *CI) -> Error { | ||
IRB.SetInsertPoint(CI); | ||
|
||
auto *It = DRM.find(CI); | ||
|
@@ -205,10 +210,10 @@ class OpLowerer { | |
}); | ||
} | ||
|
||
void lowerToBindAndAnnotateHandle(Function &F) { | ||
[[nodiscard]] bool lowerToBindAndAnnotateHandle(Function &F) { | ||
IRBuilder<> &IRB = OpBuilder.getIRB(); | ||
|
||
replaceFunction(F, [&](CallInst *CI) -> Error { | ||
return replaceFunction(F, [&](CallInst *CI) -> Error { | ||
IRB.SetInsertPoint(CI); | ||
|
||
auto *It = DRM.find(CI); | ||
|
@@ -251,12 +256,11 @@ class OpLowerer { | |
|
||
/// Lower `dx.handle.fromBinding` intrinsics depending on the shader model and | ||
/// taking into account binding information from DXILResourceAnalysis. | ||
void lowerHandleFromBinding(Function &F) { | ||
bool lowerHandleFromBinding(Function &F) { | ||
Triple TT(Triple(M.getTargetTriple())); | ||
if (TT.getDXILVersion() < VersionTuple(1, 6)) | ||
lowerToCreateHandle(F); | ||
else | ||
lowerToBindAndAnnotateHandle(F); | ||
return lowerToCreateHandle(F); | ||
return lowerToBindAndAnnotateHandle(F); | ||
} | ||
|
||
/// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op. | ||
|
@@ -342,11 +346,11 @@ class OpLowerer { | |
return Error::success(); | ||
} | ||
|
||
void lowerTypedBufferLoad(Function &F) { | ||
[[nodiscard]] bool lowerTypedBufferLoad(Function &F) { | ||
IRBuilder<> &IRB = OpBuilder.getIRB(); | ||
Type *Int32Ty = IRB.getInt32Ty(); | ||
|
||
replaceFunction(F, [&](CallInst *CI) -> Error { | ||
return replaceFunction(F, [&](CallInst *CI) -> Error { | ||
IRB.SetInsertPoint(CI); | ||
|
||
Value *Handle = | ||
|
@@ -368,8 +372,51 @@ class OpLowerer { | |
}); | ||
} | ||
|
||
[[nodiscard]] bool lowerTypedBufferStore(Function &F) { | ||
IRBuilder<> &IRB = OpBuilder.getIRB(); | ||
Type *Int8Ty = IRB.getInt8Ty(); | ||
Type *Int32Ty = IRB.getInt32Ty(); | ||
|
||
return replaceFunction(F, [&](CallInst *CI) -> Error { | ||
IRB.SetInsertPoint(CI); | ||
|
||
Value *Handle = | ||
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType()); | ||
Value *Index0 = CI->getArgOperand(1); | ||
Value *Index1 = UndefValue::get(Int32Ty); | ||
// For typed stores, the mask must always cover all four elements. | ||
Constant *Mask = ConstantInt::get(Int8Ty, 0xF); | ||
|
||
Value *Data = CI->getArgOperand(2); | ||
auto *DataTy = dyn_cast<FixedVectorType>(Data->getType()); | ||
if (!DataTy || DataTy->getNumElements() != 4) | ||
return make_error<StringError>( | ||
"typedBufferStore data must be a vector of 4 elements", | ||
inconvertibleErrorCode()); | ||
Value *Data0 = | ||
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 0)); | ||
Value *Data1 = | ||
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 1)); | ||
Value *Data2 = | ||
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 2)); | ||
Value *Data3 = | ||
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 3)); | ||
|
||
std::array<Value *, 8> Args{Handle, Index0, Index1, Data0, | ||
Data1, Data2, Data3, Mask}; | ||
Expected<CallInst *> OpCall = | ||
OpBuilder.tryCreateOp(OpCode::BufferStore, Args); | ||
if (Error E = OpCall.takeError()) | ||
return E; | ||
|
||
CI->eraseFromParent(); | ||
return Error::success(); | ||
}); | ||
} | ||
|
||
bool lowerIntrinsics() { | ||
bool Updated = false; | ||
bool HasErrors = false; | ||
|
||
for (Function &F : make_early_inc_range(M.functions())) { | ||
if (!F.isDeclaration()) | ||
|
@@ -380,19 +427,22 @@ class OpLowerer { | |
continue; | ||
#define DXIL_OP_INTRINSIC(OpCode, Intrin) \ | ||
case Intrin: \ | ||
replaceFunctionWithOp(F, OpCode); \ | ||
HasErrors |= replaceFunctionWithOp(F, OpCode); \ | ||
break; | ||
#include "DXILOperation.inc" | ||
case Intrinsic::dx_handle_fromBinding: | ||
lowerHandleFromBinding(F); | ||
HasErrors |= lowerHandleFromBinding(F); | ||
break; | ||
case Intrinsic::dx_typedBufferLoad: | ||
lowerTypedBufferLoad(F); | ||
HasErrors |= lowerTypedBufferLoad(F); | ||
break; | ||
case Intrinsic::dx_typedBufferStore: | ||
HasErrors |= lowerTypedBufferStore(F); | ||
break; | ||
} | ||
Updated = true; | ||
} | ||
if (Updated) | ||
if (Updated && !HasErrors) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So dx_typedBufferLoad cannot fail? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It can fail in the same ways any of the op lowering can fail (like say if we just give it completely incorrect types) but I think it happens to always fail in such a way that the cleanup wouldn't crash. The HasErrors check is added here so that we can carry on and finish the pass and just let LLVM's error handling propagate the error afterwards, because when All that said, we can and should add some tests for cases where lowering loads fails. I'll do that in a follow up change. |
||
cleanupHandleCasts(); | ||
|
||
return Updated; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
; We use llc for this test so that we don't abort after the first error. | ||
; RUN: not llc %s -o /dev/null 2>&1 | FileCheck %s | ||
|
||
target triple = "dxil-pc-shadermodel6.6-compute" | ||
|
||
; CHECK: error: | ||
; CHECK-SAME: in function storetoomany | ||
; CHECK-SAME: typedBufferStore data must be a vector of 4 elements | ||
define void @storetoomany(<5 x float> %data, i32 %index) { | ||
bogner marked this conversation as resolved.
Show resolved
Hide resolved
|
||
%buffer = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) | ||
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0( | ||
i32 0, i32 0, i32 1, i32 0, i1 false) | ||
|
||
call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v5f32( | ||
target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, | ||
i32 %index, <5 x float> %data) | ||
|
||
ret void | ||
} | ||
|
||
; CHECK: error: | ||
; CHECK-SAME: in function storetoofew | ||
; CHECK-SAME: typedBufferStore data must be a vector of 4 elements | ||
define void @storetoofew(<3 x i32> %data, i32 %index) { | ||
%buffer = call target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) | ||
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i32_1_0_0( | ||
i32 0, i32 0, i32 1, i32 0, i1 false) | ||
|
||
call void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4i32_1_0_0t.v3i32( | ||
target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %buffer, | ||
i32 %index, <3 x i32> %data) | ||
|
||
ret void | ||
} | ||
|
||
declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4f32_1_0_0t.v5f32(target("dx.TypedBuffer", <4 x float>, 1, 0, 0), i32, <5 x float>) | ||
declare void @llvm.dx.typedBufferStore.tdx.TypedBuffer_v4i32_1_0_0t.v3i32(target("dx.TypedBuffer", <4 x i32>, 1, 0, 0), i32, <3 x i32>) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
; RUN: opt -S -dxil-op-lower %s | FileCheck %s | ||
|
||
target triple = "dxil-pc-shadermodel6.6-compute" | ||
|
||
define void @storefloat(<4 x float> %data, i32 %index) { | ||
|
||
; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding | ||
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]] | ||
%buffer = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) | ||
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0( | ||
i32 0, i32 0, i32 1, i32 0, i1 false) | ||
|
||
; The temporary casts should all have been cleaned up | ||
; CHECK-NOT: %dx.cast_handle | ||
|
||
; CHECK: [[DATA0_0:%.*]] = extractelement <4 x float> %data, i32 0 | ||
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x float> %data, i32 1 | ||
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x float> %data, i32 2 | ||
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x float> %data, i32 3 | ||
; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15) | ||
call void @llvm.dx.typedBufferStore( | ||
target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer, | ||
i32 %index, <4 x float> %data) | ||
|
||
ret void | ||
} | ||
|
||
define void @storeint(<4 x i32> %data, i32 %index) { | ||
|
||
; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding | ||
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]] | ||
%buffer = call target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) | ||
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i32_1_0_0( | ||
i32 0, i32 0, i32 1, i32 0, i1 false) | ||
|
||
; CHECK: [[DATA0_0:%.*]] = extractelement <4 x i32> %data, i32 0 | ||
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i32> %data, i32 1 | ||
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i32> %data, i32 2 | ||
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i32> %data, i32 3 | ||
; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i32 [[DATA0_0]], i32 [[DATA0_1]], i32 [[DATA0_2]], i32 [[DATA0_3]], i8 15) | ||
call void @llvm.dx.typedBufferStore( | ||
target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %buffer, | ||
i32 %index, <4 x i32> %data) | ||
|
||
ret void | ||
} | ||
|
||
define void @storehalf(<4 x half> %data, i32 %index) { | ||
|
||
; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding | ||
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]] | ||
%buffer = call target("dx.TypedBuffer", <4 x half>, 1, 0, 0) | ||
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f16_1_0_0( | ||
i32 0, i32 0, i32 1, i32 0, i1 false) | ||
|
||
; The temporary casts should all have been cleaned up | ||
; CHECK-NOT: %dx.cast_handle | ||
|
||
; CHECK: [[DATA0_0:%.*]] = extractelement <4 x half> %data, i32 0 | ||
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x half> %data, i32 1 | ||
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x half> %data, i32 2 | ||
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x half> %data, i32 3 | ||
; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, half [[DATA0_0]], half [[DATA0_1]], half [[DATA0_2]], half [[DATA0_3]], i8 15) | ||
call void @llvm.dx.typedBufferStore( | ||
target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer, | ||
i32 %index, <4 x half> %data) | ||
|
||
ret void | ||
} | ||
|
||
define void @storei16(<4 x i16> %data, i32 %index) { | ||
|
||
; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding | ||
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]] | ||
%buffer = call target("dx.TypedBuffer", <4 x i16>, 1, 0, 0) | ||
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i16_1_0_0( | ||
i32 0, i32 0, i32 1, i32 0, i1 false) | ||
|
||
; The temporary casts should all have been cleaned up | ||
; CHECK-NOT: %dx.cast_handle | ||
|
||
; CHECK: [[DATA0_0:%.*]] = extractelement <4 x i16> %data, i32 0 | ||
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i16> %data, i32 1 | ||
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i16> %data, i32 2 | ||
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i16> %data, i32 3 | ||
; CHECK: call void @dx.op.bufferStore.i16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i16 [[DATA0_0]], i16 [[DATA0_1]], i16 [[DATA0_2]], i16 [[DATA0_3]], i8 15) | ||
call void @llvm.dx.typedBufferStore( | ||
target("dx.TypedBuffer", <4 x i16>, 1, 0, 0) %buffer, | ||
i32 %index, <4 x i16> %data) | ||
|
||
ret void | ||
} |
Uh oh!
There was an error while loading. Please reload this page.