Skip to content

Commit 34e20f1

Browse files
authored
[DirectX] Implement typedBufferLoad_checkbit (#108087)
This represents a typedBufferLoad that's followed by "CheckAccessFullyMapped". It returns an extra `i1` representing that value. Fixes #108085
1 parent 93e45a6 commit 34e20f1

File tree

5 files changed

+96
-11
lines changed

5 files changed

+96
-11
lines changed

llvm/docs/DirectX/DXILResources.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,12 @@ Examples:
361361
- ``i32``
362362
- Index into the buffer
363363

364+
.. code-block:: llvm
365+
366+
%ret = call {<4 x float>, i1}
367+
@llvm.dx.typedBufferLoad.checkbit.v4f32.tdx.TypedBuffer_v4f32_0_0_0t(
368+
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 %index)
369+
364370
Texture and Typed Buffer Stores
365371
-------------------------------
366372

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ def int_dx_handle_fromBinding
3232

3333
def int_dx_typedBufferLoad
3434
: DefaultAttrsIntrinsic<[llvm_any_ty], [llvm_any_ty, llvm_i32_ty]>;
35+
def int_dx_typedBufferLoad_checkbit
36+
: DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
37+
[llvm_any_ty, llvm_i32_ty]>;
3538
def int_dx_typedBufferStore
3639
: DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>;
3740

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,15 @@ def BufferStore : DXILOp<69, bufferStore> {
719719
let stages = [Stages<DXIL1_0, [all_stages]>];
720720
}
721721

722+
def CheckAccessFullyMapped : DXILOp<71, checkAccessFullyMapped> {
723+
let Doc = "checks whether a Sample, Gather, or Load operation "
724+
"accessed mapped tiles in a tiled resource";
725+
let arguments = [OverloadTy];
726+
let result = Int1Ty;
727+
let overloads = [Overloads<DXIL1_0, [Int32Ty]>];
728+
let stages = [Stages<DXIL1_0, [all_stages]>];
729+
}
730+
722731
def ThreadId : DXILOp<93, threadId> {
723732
let Doc = "Reads the thread ID";
724733
let LLVMIntrinsic = int_dx_thread_id;

llvm/lib/Target/DirectX/DXILOpLowering.cpp

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -265,16 +265,50 @@ class OpLowerer {
265265

266266
/// Replace uses of \c Intrin with the values in the `dx.ResRet` of \c Op.
267267
/// Since we expect to be post-scalarization, make an effort to avoid vectors.
268-
Error replaceResRetUses(CallInst *Intrin, CallInst *Op) {
268+
Error replaceResRetUses(CallInst *Intrin, CallInst *Op, bool HasCheckBit) {
269269
IRBuilder<> &IRB = OpBuilder.getIRB();
270270

271+
Instruction *OldResult = Intrin;
271272
Type *OldTy = Intrin->getType();
272273

274+
if (HasCheckBit) {
275+
auto *ST = cast<StructType>(OldTy);
276+
277+
Value *CheckOp = nullptr;
278+
Type *Int32Ty = IRB.getInt32Ty();
279+
for (Use &U : make_early_inc_range(OldResult->uses())) {
280+
if (auto *EVI = dyn_cast<ExtractValueInst>(U.getUser())) {
281+
ArrayRef<unsigned> Indices = EVI->getIndices();
282+
assert(Indices.size() == 1);
283+
// We're only interested in uses of the check bit for now.
284+
if (Indices[0] != 1)
285+
continue;
286+
if (!CheckOp) {
287+
Value *NewEVI = IRB.CreateExtractValue(Op, 4);
288+
Expected<CallInst *> OpCall = OpBuilder.tryCreateOp(
289+
OpCode::CheckAccessFullyMapped, {NewEVI}, Int32Ty);
290+
if (Error E = OpCall.takeError())
291+
return E;
292+
CheckOp = *OpCall;
293+
}
294+
EVI->replaceAllUsesWith(CheckOp);
295+
EVI->eraseFromParent();
296+
}
297+
}
298+
299+
OldResult = cast<Instruction>(IRB.CreateExtractValue(Op, 0));
300+
OldTy = ST->getElementType(0);
301+
}
302+
273303
// For scalars, we just extract the first element.
274304
if (!isa<FixedVectorType>(OldTy)) {
275305
Value *EVI = IRB.CreateExtractValue(Op, 0);
276-
Intrin->replaceAllUsesWith(EVI);
277-
Intrin->eraseFromParent();
306+
OldResult->replaceAllUsesWith(EVI);
307+
OldResult->eraseFromParent();
308+
if (OldResult != Intrin) {
309+
assert(Intrin->use_empty() && "Intrinsic still has uses?");
310+
Intrin->eraseFromParent();
311+
}
278312
return Error::success();
279313
}
280314

@@ -283,7 +317,7 @@ class OpLowerer {
283317

284318
// The users of the operation should all be scalarized, so we attempt to
285319
// replace the extractelements with extractvalues directly.
286-
for (Use &U : make_early_inc_range(Intrin->uses())) {
320+
for (Use &U : make_early_inc_range(OldResult->uses())) {
287321
if (auto *EEI = dyn_cast<ExtractElementInst>(U.getUser())) {
288322
if (auto *IndexOp = dyn_cast<ConstantInt>(EEI->getIndexOperand())) {
289323
size_t IndexVal = IndexOp->getZExtValue();
@@ -331,22 +365,27 @@ class OpLowerer {
331365
// If we still have uses, then we're not fully scalarized and need to
332366
// recreate the vector. This should only happen for things like exported
333367
// functions from libraries.
334-
if (!Intrin->use_empty()) {
368+
if (!OldResult->use_empty()) {
335369
for (int I = 0, E = N; I != E; ++I)
336370
if (!Extracts[I])
337371
Extracts[I] = IRB.CreateExtractValue(Op, I);
338372

339373
Value *Vec = UndefValue::get(OldTy);
340374
for (int I = 0, E = N; I != E; ++I)
341375
Vec = IRB.CreateInsertElement(Vec, Extracts[I], I);
342-
Intrin->replaceAllUsesWith(Vec);
376+
OldResult->replaceAllUsesWith(Vec);
377+
}
378+
379+
OldResult->eraseFromParent();
380+
if (OldResult != Intrin) {
381+
assert(Intrin->use_empty() && "Intrinsic still has uses?");
382+
Intrin->eraseFromParent();
343383
}
344384

345-
Intrin->eraseFromParent();
346385
return Error::success();
347386
}
348387

349-
[[nodiscard]] bool lowerTypedBufferLoad(Function &F) {
388+
[[nodiscard]] bool lowerTypedBufferLoad(Function &F, bool HasCheckBit) {
350389
IRBuilder<> &IRB = OpBuilder.getIRB();
351390
Type *Int32Ty = IRB.getInt32Ty();
352391

@@ -358,14 +397,17 @@ class OpLowerer {
358397
Value *Index0 = CI->getArgOperand(1);
359398
Value *Index1 = UndefValue::get(Int32Ty);
360399

361-
Type *NewRetTy = OpBuilder.getResRetType(CI->getType()->getScalarType());
400+
Type *OldTy = CI->getType();
401+
if (HasCheckBit)
402+
OldTy = cast<StructType>(OldTy)->getElementType(0);
403+
Type *NewRetTy = OpBuilder.getResRetType(OldTy->getScalarType());
362404

363405
std::array<Value *, 3> Args{Handle, Index0, Index1};
364406
Expected<CallInst *> OpCall =
365407
OpBuilder.tryCreateOp(OpCode::BufferLoad, Args, NewRetTy);
366408
if (Error E = OpCall.takeError())
367409
return E;
368-
if (Error E = replaceResRetUses(CI, *OpCall))
410+
if (Error E = replaceResRetUses(CI, *OpCall, HasCheckBit))
369411
return E;
370412

371413
return Error::success();
@@ -434,7 +476,10 @@ class OpLowerer {
434476
HasErrors |= lowerHandleFromBinding(F);
435477
break;
436478
case Intrinsic::dx_typedBufferLoad:
437-
HasErrors |= lowerTypedBufferLoad(F);
479+
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/false);
480+
break;
481+
case Intrinsic::dx_typedBufferLoad_checkbit:
482+
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true);
438483
break;
439484
case Intrinsic::dx_typedBufferStore:
440485
HasErrors |= lowerTypedBufferStore(F);

llvm/test/CodeGen/DirectX/BufferLoad.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ target triple = "dxil-pc-shadermodel6.6-compute"
44

55
declare void @scalar_user(float)
66
declare void @vector_user(<4 x float>)
7+
declare void @check_user(i1)
78

89
define void @loadv4f32() {
910
; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
@@ -128,6 +129,27 @@ define void @loadv2f32() {
128129
ret void
129130
}
130131

132+
define void @loadv4f32_checkbit() {
133+
; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
134+
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
135+
%buffer = call target("dx.TypedBuffer", <4 x float>, 0, 0, 0)
136+
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_0_0_0(
137+
i32 0, i32 0, i32 1, i32 0, i1 false)
138+
139+
; CHECK: [[DATA0:%.*]] = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 68, %dx.types.Handle [[HANDLE]], i32 0, i32 undef)
140+
%data0 = call {<4 x float>, i1} @llvm.dx.typedBufferLoad.checkbit.f32(
141+
target("dx.TypedBuffer", <4 x float>, 0, 0, 0) %buffer, i32 0)
142+
143+
; CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.f32 [[DATA0]], 4
144+
; CHECK: [[MAPPED:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]
145+
%check = extractvalue {<4 x float>, i1} %data0, 1
146+
147+
; CHECK: call void @check_user(i1 [[MAPPED]])
148+
call void @check_user(i1 %check)
149+
150+
ret void
151+
}
152+
131153
define void @loadv4i32() {
132154
; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
133155
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]

0 commit comments

Comments
 (0)