Skip to content

Commit 151e561

Browse files
committed
[DirectX] Lower @llvm.dx.typedBufferStore to DXIL ops
The `@llvm.dx.typedBufferStore` intrinsic is lowered to `@dx.op.bufferStore`. Pull Request: llvm#104253
1 parent d78ffd2 commit 151e561

File tree

5 files changed

+150
-5
lines changed

5 files changed

+150
-5
lines changed

llvm/docs/DirectX/DXILResources.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -365,11 +365,11 @@ Examples:
365365

366366
.. code-block:: llvm
367367
368-
call void @llvm.dx.bufferStore.tdx.Buffer_f32_1_0t(
368+
call void @llvm.dx.typedBufferStore.tdx.Buffer_v4f32_1_0_0t(
369369
target("dx.TypedBuffer", f32, 1, 0) %buf, i32 %index, <4 x f32> %data)
370-
call void @llvm.dx.bufferStore.tdx.Buffer_f16_1_0t(
370+
call void @llvm.dx.typedBufferStore.tdx.Buffer_v4f16_1_0_0t(
371371
target("dx.TypedBuffer", f16, 1, 0) %buf, i32 %index, <4 x f16> %data)
372-
call void @llvm.dx.bufferStore.tdx.Buffer_f64_1_0t(
372+
call void @llvm.dx.typedBufferStore.tdx.Buffer_v2f64_1_0_0t(
373373
target("dx.TypedBuffer", f64, 1, 0) %buf, i32 %index, <2 x f64> %data)
374374
375375
.. list-table:: ``@llvm.dx.rawBufferPtr``

llvm/include/llvm/IR/IntrinsicsDirectX.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ def int_dx_handle_fromBinding
3131
[IntrNoMem]>;
3232

3333
def int_dx_typedBufferLoad
34-
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
35-
[llvm_any_ty, llvm_i32_ty]>;
34+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_any_ty, llvm_i32_ty]>;
35+
def int_dx_typedBufferStore
36+
: DefaultAttrsIntrinsic<[], [llvm_any_ty, llvm_i32_ty, llvm_anyvector_ty]>;
3637

3738
// Cast between target extension handle types and dxil-style opaque handles
3839
def int_dx_cast_handle : Intrinsic<[llvm_any_ty], [llvm_any_ty]>;

llvm/lib/Target/DirectX/DXIL.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,18 @@ def BufferLoad : DXILOp<68, bufferLoad> {
697697
let stages = [Stages<DXIL1_0, [all_stages]>];
698698
}
699699

700+
def BufferStore : DXILOp<69, bufferStore> {
701+
let Doc = "writes to an RWTypedBuffer";
702+
// Handle, Coord0, Coord1, Val0, Val1, Val2, Val3, Mask
703+
let arguments = [
704+
HandleTy, Int32Ty, Int32Ty, OverloadTy, OverloadTy, OverloadTy, OverloadTy,
705+
Int8Ty
706+
];
707+
let result = VoidTy;
708+
let overloads = [Overloads<DXIL1_0, [HalfTy, FloatTy, Int16Ty, Int32Ty]>];
709+
let stages = [Stages<DXIL1_0, [all_stages]>];
710+
}
711+
700712
def ThreadId : DXILOp<93, threadId> {
701713
let Doc = "Reads the thread ID";
702714
let LLVMIntrinsic = int_dx_thread_id;

llvm/lib/Target/DirectX/DXILOpLowering.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,43 @@ class OpLowerer {
289289
});
290290
}
291291

292+
void lowerTypedBufferStore(Function &F) {
293+
IRBuilder<> &IRB = OpBuilder.getIRB();
294+
Type *Int8Ty = IRB.getInt8Ty();
295+
Type *Int32Ty = IRB.getInt32Ty();
296+
297+
replaceFunction(F, [&](CallInst *CI) -> Error {
298+
IRB.SetInsertPoint(CI);
299+
300+
Value *Handle =
301+
createTmpHandleCast(CI->getArgOperand(0), OpBuilder.getHandleType());
302+
Value *Index0 = CI->getArgOperand(1);
303+
Value *Index1 = UndefValue::get(Int32Ty);
304+
// For typed stores, the mask must always cover all four elements.
305+
Constant *Mask = ConstantInt::get(Int8Ty, 0xF);
306+
307+
Value *Data = CI->getArgOperand(2);
308+
Value *Data0 =
309+
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 0));
310+
Value *Data1 =
311+
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 1));
312+
Value *Data2 =
313+
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 2));
314+
Value *Data3 =
315+
IRB.CreateExtractElement(Data, ConstantInt::get(Int32Ty, 3));
316+
317+
std::array<Value *, 8> Args{Handle, Index0, Index1, Data0,
318+
Data1, Data2, Data3, Mask};
319+
Expected<CallInst *> OpCall =
320+
OpBuilder.tryCreateOp(OpCode::BufferStore, Args);
321+
if (Error E = OpCall.takeError())
322+
return E;
323+
324+
CI->eraseFromParent();
325+
return Error::success();
326+
});
327+
}
328+
292329
bool lowerIntrinsics() {
293330
bool Updated = false;
294331

@@ -310,6 +347,9 @@ class OpLowerer {
310347
case Intrinsic::dx_typedBufferLoad:
311348
lowerTypedBufferLoad(F);
312349
break;
350+
case Intrinsic::dx_typedBufferStore:
351+
lowerTypedBufferStore(F);
352+
break;
313353
}
314354
Updated = true;
315355
}
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
; RUN: opt -S -dxil-op-lower %s | FileCheck %s
2+
3+
target triple = "dxil-pc-shadermodel6.6-compute"
4+
5+
define void @storefloat(<4 x float> %data, i32 %index) {
6+
7+
; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
8+
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
9+
%buffer = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0)
10+
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f32_1_0_0(
11+
i32 0, i32 0, i32 1, i32 0, i1 false)
12+
13+
; The temporary casts should all have been cleaned up
14+
; CHECK-NOT: %dx.cast_handle
15+
16+
; CHECK: [[DATA0_0:%.*]] = extractelement <4 x float> %data, i32 0
17+
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x float> %data, i32 1
18+
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x float> %data, i32 2
19+
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x float> %data, i32 3
20+
; CHECK: call void @dx.op.bufferStore.f32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, float [[DATA0_0]], float [[DATA0_1]], float [[DATA0_2]], float [[DATA0_3]], i8 15)
21+
call void @llvm.dx.typedBufferStore(
22+
target("dx.TypedBuffer", <4 x float>, 1, 0, 0) %buffer,
23+
i32 %index, <4 x float> %data)
24+
25+
ret void
26+
}
27+
28+
define void @storeint(<4 x i32> %data, i32 %index) {
29+
30+
; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
31+
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
32+
%buffer = call target("dx.TypedBuffer", <4 x i32>, 1, 0, 0)
33+
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i32_1_0_0(
34+
i32 0, i32 0, i32 1, i32 0, i1 false)
35+
36+
; CHECK: [[DATA0_0:%.*]] = extractelement <4 x i32> %data, i32 0
37+
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i32> %data, i32 1
38+
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i32> %data, i32 2
39+
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i32> %data, i32 3
40+
; CHECK: call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i32 [[DATA0_0]], i32 [[DATA0_1]], i32 [[DATA0_2]], i32 [[DATA0_3]], i8 15)
41+
call void @llvm.dx.typedBufferStore(
42+
target("dx.TypedBuffer", <4 x i32>, 1, 0, 0) %buffer,
43+
i32 %index, <4 x i32> %data)
44+
45+
ret void
46+
}
47+
48+
define void @storehalf(<4 x half> %data, i32 %index) {
49+
50+
; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
51+
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
52+
%buffer = call target("dx.TypedBuffer", <4 x half>, 1, 0, 0)
53+
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4f16_1_0_0(
54+
i32 0, i32 0, i32 1, i32 0, i1 false)
55+
56+
; The temporary casts should all have been cleaned up
57+
; CHECK-NOT: %dx.cast_handle
58+
59+
; CHECK: [[DATA0_0:%.*]] = extractelement <4 x half> %data, i32 0
60+
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x half> %data, i32 1
61+
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x half> %data, i32 2
62+
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x half> %data, i32 3
63+
; CHECK: call void @dx.op.bufferStore.f16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, half [[DATA0_0]], half [[DATA0_1]], half [[DATA0_2]], half [[DATA0_3]], i8 15)
64+
call void @llvm.dx.typedBufferStore(
65+
target("dx.TypedBuffer", <4 x half>, 1, 0, 0) %buffer,
66+
i32 %index, <4 x half> %data)
67+
68+
ret void
69+
}
70+
71+
define void @storei16(<4 x i16> %data, i32 %index) {
72+
73+
; CHECK: [[BIND:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding
74+
; CHECK: [[HANDLE:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 217, %dx.types.Handle [[BIND]]
75+
%buffer = call target("dx.TypedBuffer", <4 x i16>, 1, 0, 0)
76+
@llvm.dx.handle.fromBinding.tdx.TypedBuffer_v4i16_1_0_0(
77+
i32 0, i32 0, i32 1, i32 0, i1 false)
78+
79+
; The temporary casts should all have been cleaned up
80+
; CHECK-NOT: %dx.cast_handle
81+
82+
; CHECK: [[DATA0_0:%.*]] = extractelement <4 x i16> %data, i32 0
83+
; CHECK: [[DATA0_1:%.*]] = extractelement <4 x i16> %data, i32 1
84+
; CHECK: [[DATA0_2:%.*]] = extractelement <4 x i16> %data, i32 2
85+
; CHECK: [[DATA0_3:%.*]] = extractelement <4 x i16> %data, i32 3
86+
; CHECK: call void @dx.op.bufferStore.i16(i32 69, %dx.types.Handle [[HANDLE]], i32 %index, i32 undef, i16 [[DATA0_0]], i16 [[DATA0_1]], i16 [[DATA0_2]], i16 [[DATA0_3]], i8 15)
87+
call void @llvm.dx.typedBufferStore(
88+
target("dx.TypedBuffer", <4 x i16>, 1, 0, 0) %buffer,
89+
i32 %index, <4 x i16> %data)
90+
91+
ret void
92+
}

0 commit comments

Comments
 (0)