Skip to content

Commit b96adb5

Browse files
committed
Implement memcpy in DXIL CBuffer Access
1 parent c24411f commit b96adb5

File tree

2 files changed

+287
-0
lines changed

2 files changed

+287
-0
lines changed

llvm/lib/Target/DirectX/DXILCBufferAccess.cpp

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,82 @@ static void replaceLoad(LoadInst *LI, CBufferResource &CBR,
195195
DeadInsts.push_back(LI);
196196
}
197197

198+
/// Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle
199+
/// itself. Assumes the cbuffer global is an array, and the length of bytes to
200+
/// copy is divisible by array element allocation size.
201+
/// The memcpy source must also be a direct cbuffer global reference, not a GEP.
202+
static void replaceMemCpy(MemCpyInst *MCI, CBufferResource &CBR,
203+
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
204+
205+
ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType());
206+
assert(ArrTy && "MemCpy lowering is only supported for array types");
207+
208+
// This assumption vastly simplifies the implementation
209+
if (MCI->getSource() != CBR.Member)
210+
reportFatalUsageError(
211+
"Expected MemCpy source to be a cbuffer global variable");
212+
213+
const std::string Name = ("memcpy." + MCI->getDest()->getName() + "." +
214+
MCI->getSource()->getName())
215+
.str();
216+
217+
ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength());
218+
uint64_t ByteLength = Length->getZExtValue();
219+
220+
// If length to copy is zero, no memcpy is needed
221+
if (ByteLength == 0) {
222+
DeadInsts.push_back(MCI);
223+
return;
224+
}
225+
226+
const DataLayout &DL = CBR.getDataLayout();
227+
228+
Type *ElemTy = ArrTy->getElementType();
229+
size_t ElemSize = DL.getTypeAllocSize(ElemTy);
230+
assert(ByteLength % ElemSize == 0 &&
231+
"Length of bytes to MemCpy must be divisible by allocation size of "
232+
"source/destination array elements");
233+
size_t ElemsToCpy = ByteLength / ElemSize;
234+
235+
IRBuilder<> Builder(MCI);
236+
CBR.createAndSetCurrentHandle(Builder);
237+
238+
auto CopyElemsImpl = [&Builder, &MCI, &Name, &CBR,
239+
&DL](const auto &Self, ArrayType *ArrTy,
240+
size_t ArrOffset, size_t N) -> void {
241+
Type *ElemTy = ArrTy->getElementType();
242+
size_t ElemTySize = DL.getTypeAllocSize(ElemTy);
243+
for (unsigned I = 0; I < N; ++I) {
244+
size_t Offset = ArrOffset + I * ElemTySize;
245+
246+
// Recursively copy nested arrays
247+
if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) {
248+
Self(Self, ElemArrTy, Offset, ElemArrTy->getNumElements());
249+
continue;
250+
}
251+
252+
// Load CBuffer value and store it in Dest
253+
APInt CBufArrayOffset(
254+
DL.getIndexTypeSizeInBits(MCI->getSource()->getType()), Offset);
255+
CBufArrayOffset =
256+
hlsl::translateCBufArrayOffset(DL, CBufArrayOffset, ArrTy);
257+
Value *CBufferVal =
258+
CBR.loadValue(Builder, ElemTy, CBufArrayOffset.getZExtValue(), Name);
259+
Value *GEP =
260+
Builder.CreateInBoundsGEP(Builder.getInt8Ty(), MCI->getDest(),
261+
{Builder.getInt32(Offset)}, Name + ".dest");
262+
Builder.CreateStore(CBufferVal, GEP, MCI->isVolatile());
263+
}
264+
};
265+
auto CopyElems = [&CopyElemsImpl](ArrayType *ArrTy, size_t N) -> void {
266+
CopyElemsImpl(CopyElemsImpl, ArrTy, 0, N);
267+
};
268+
269+
CopyElems(ArrTy, ElemsToCpy);
270+
271+
MCI->eraseFromParent();
272+
}
273+
198274
static void replaceAccessesWithHandle(CBufferResource &CBR) {
199275
SmallVector<WeakTrackingVH> DeadInsts;
200276

@@ -208,6 +284,13 @@ static void replaceAccessesWithHandle(CBufferResource &CBR) {
208284
continue;
209285
}
210286

287+
// If we have a memcpy instruction, replace it with multiple accesses and
288+
// subsequent stores to the destination
289+
if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) {
290+
replaceMemCpy(MCI, CBR, DeadInsts);
291+
continue;
292+
}
293+
211294
// Otherwise, walk users looking for a load...
212295
if (isa<GetElementPtrInst>(Cur) || isa<GEPOperator>(Cur)) {
213296
ToProcess.append(Cur->user_begin(), Cur->user_end());
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
; RUN: opt -S -dxil-cbuffer-access -mtriple=dxil--shadermodel6.3-library %s | FileCheck %s
2+
3+
; cbuffer CB : register(b0) {
4+
; float a1[3];
5+
; double3 a2[2];
6+
; float16_t a3[2][2];
7+
; uint64_t a4[3];
8+
; int2 a5[3][2];
9+
; uint16_t a6[1];
10+
; int64_t a7[2];
11+
; bool a8[4];
12+
; }
13+
%__cblayout_CB = type <{ [3 x float], [2 x <3 x double>], [2 x [2 x half]], [3 x i64], [3 x [2 x <2 x i32>]], [1 x i16], [2 x i64], [4 x i32] }>
14+
15+
@CB.cb = local_unnamed_addr global target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) poison
16+
@a1 = external local_unnamed_addr addrspace(2) global [3 x float], align 4
17+
@a2 = external local_unnamed_addr addrspace(2) global [2 x <3 x double>], align 32
18+
@a3 = external local_unnamed_addr addrspace(2) global [2 x [2 x half]], align 2
19+
@a4 = external local_unnamed_addr addrspace(2) global [3 x i64], align 8
20+
@a5 = external local_unnamed_addr addrspace(2) global [3 x [2 x <2 x i32>]], align 16
21+
@a6 = external local_unnamed_addr addrspace(2) global [1 x i16], align 2
22+
@a7 = external local_unnamed_addr addrspace(2) global [2 x i64], align 8
23+
@a8 = external local_unnamed_addr addrspace(2) global [4 x i32], align 4
24+
25+
; CHECK: define void @f(
26+
define void @f(ptr %dst) {
27+
entry:
28+
%CB.cb_h.i.i = tail call target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 1, i32 0, i1 false, ptr null)
29+
store target("dx.CBuffer", target("dx.Layout", %__cblayout_CB, 708, 0, 48, 112, 176, 224, 272, 288, 320)) %CB.cb_h.i.i, ptr @CB.cb, align 4
30+
31+
%a1.copy = alloca [3 x float], align 4
32+
%a2.copy = alloca [2 x <3 x double>], align 32
33+
%a3.copy = alloca [2 x [2 x half]], align 2
34+
%a4.copy = alloca [3 x i64], align 8
35+
%a5.copy = alloca [3 x [2 x <2 x i32>]], align 16
36+
%a6.copy = alloca [1 x i16], align 2
37+
%a7.copy = alloca [2 x i64], align 8
38+
%a8.copy = alloca [4 x i32], align 4
39+
40+
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
41+
; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 0)
42+
; CHECK: [[X:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
43+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY:%.*]], i32 0
44+
; CHECK: store float [[X]], ptr [[DEST]], align 4
45+
; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 1)
46+
; CHECK: [[Y:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
47+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 4
48+
; CHECK: store float [[Y]], ptr [[DEST]], align 4
49+
; CHECK: [[LOAD:%.*]] = call { float, float, float, float } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 2)
50+
; CHECK: [[Z:%.*]] = extractvalue { float, float, float, float } [[LOAD]], 0
51+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A1_COPY]], i32 8
52+
; CHECK: store float [[Z]], ptr [[DEST]], align 4
53+
call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a1.copy, ptr addrspace(2) align 4 @a1, i32 12, i1 false)
54+
55+
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
56+
; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 3)
57+
; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
58+
; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
59+
; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 4)
60+
; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
61+
; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
62+
; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
63+
; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
64+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY:%.*]], i32 0
65+
; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 32
66+
; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 5)
67+
; CHECK: [[X:%.*]] = extractvalue { double, double } [[LOAD]], 0
68+
; CHECK: [[Y:%.*]] = extractvalue { double, double } [[LOAD]], 1
69+
; CHECK: [[LOAD:%.*]] = call { double, double } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 6)
70+
; CHECK: [[Z:%.*]] = extractvalue { double, double } [[LOAD]], 0
71+
; CHECK: [[UPTO0:%.*]] = insertelement <3 x double> poison, double [[X]], i32 0
72+
; CHECK: [[UPTO1:%.*]] = insertelement <3 x double> [[UPTO0]], double [[Y]], i32 1
73+
; CHECK: [[UPTO2:%.*]] = insertelement <3 x double> [[UPTO1]], double [[Z]], i32 2
74+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A2_COPY]], i32 32
75+
; CHECK: store <3 x double> [[UPTO2]], ptr [[DEST]], align 32
76+
call void @llvm.memcpy.p0.p2.i32(ptr align 32 %a2.copy, ptr addrspace(2) align 32 @a2, i32 64, i1 false)
77+
78+
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
79+
; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 7)
80+
; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
81+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY:%.*]], i32 0
82+
; CHECK: store half [[X]], ptr [[DEST]], align 2
83+
; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 8)
84+
; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
85+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 2
86+
; CHECK: store half [[Y]], ptr [[DEST]], align 2
87+
; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 9)
88+
; CHECK: [[X:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
89+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 4
90+
; CHECK: store half [[X]], ptr [[DEST]], align 2
91+
; CHECK: [[LOAD:%.*]] = call { half, half, half, half, half, half, half, half } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 10)
92+
; CHECK: [[Y:%.*]] = extractvalue { half, half, half, half, half, half, half, half } [[LOAD]], 0
93+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A3_COPY]], i32 6
94+
; CHECK: store half [[Y]], ptr [[DEST]], align 2
95+
call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a3.copy, ptr addrspace(2) align 2 @a3, i32 8, i1 false)
96+
97+
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
98+
; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 11)
99+
; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
100+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY:%.*]], i32 0
101+
; CHECK: store i64 [[X]], ptr [[DEST]], align 8
102+
; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 12)
103+
; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
104+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 8
105+
; CHECK: store i64 [[Y]], ptr [[DEST]], align 8
106+
; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 13)
107+
; CHECK: [[Z:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
108+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A4_COPY]], i32 16
109+
; CHECK: store i64 [[Z]], ptr [[DEST]], align 8
110+
call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a4.copy, ptr addrspace(2) align 8 @a4, i32 24, i1 false)
111+
112+
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
113+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 14)
114+
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
115+
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
116+
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
117+
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
118+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY:%.*]], i32 0
119+
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
120+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 15)
121+
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
122+
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
123+
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
124+
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
125+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 8
126+
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
127+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 16)
128+
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
129+
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
130+
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
131+
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
132+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 16
133+
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
134+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
135+
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
136+
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
137+
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
138+
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
139+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 24
140+
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
141+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
142+
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
143+
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
144+
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
145+
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
146+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 32
147+
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
148+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
149+
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
150+
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 1
151+
; CHECK: [[UPTO0:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i32 0
152+
; CHECK: [[UPTO1:%.*]] = insertelement <2 x i32> [[UPTO0]], i32 [[Y]], i32 1
153+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A5_COPY]], i32 40
154+
; CHECK: store <2 x i32> [[UPTO1]], ptr [[DEST]], align 8
155+
call void @llvm.memcpy.p0.p2.i32(ptr align 16 %a5.copy, ptr addrspace(2) align 16 @a5, i32 48, i1 false)
156+
157+
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
158+
; CHECK: [[LOAD:%.*]] = call { i16, i16, i16, i16, i16, i16, i16, i16 } @llvm.dx.resource.load.cbufferrow.8.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 17)
159+
; CHECK: [[X:%.*]] = extractvalue { i16, i16, i16, i16, i16, i16, i16, i16 } [[LOAD]], 0
160+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A6_COPY:%.*]], i32 0
161+
; CHECK: store i16 [[X]], ptr [[DEST]], align 2
162+
call void @llvm.memcpy.p0.p2.i32(ptr align 2 %a6.copy, ptr addrspace(2) align 2 @a6, i32 2, i1 false)
163+
164+
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
165+
; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 18)
166+
; CHECK: [[X:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
167+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY:%.*]], i32 0
168+
; CHECK: store i64 [[X]], ptr [[DEST]], align 8
169+
; CHECK: [[LOAD:%.*]] = call { i64, i64 } @llvm.dx.resource.load.cbufferrow.2.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 19)
170+
; CHECK: [[Y:%.*]] = extractvalue { i64, i64 } [[LOAD]], 0
171+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A7_COPY]], i32 8
172+
; CHECK: store i64 [[Y]], ptr [[DEST]], align 8
173+
call void @llvm.memcpy.p0.p2.i32(ptr align 8 %a7.copy, ptr addrspace(2) align 8 @a7, i32 16, i1 false)
174+
175+
; CHECK: [[CB:%.*]] = load target("dx.CBuffer", {{.*}})), ptr @CB.cb, align 4
176+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 20)
177+
; CHECK: [[X:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
178+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY:%.*]], i32 0
179+
; CHECK: store i32 [[X]], ptr [[DEST]], align 4
180+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 21)
181+
; CHECK: [[Y:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
182+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 4
183+
; CHECK: store i32 [[Y]], ptr [[DEST]], align 4
184+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 22)
185+
; CHECK: [[Z:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
186+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 8
187+
; CHECK: store i32 [[Z]], ptr [[DEST]], align 4
188+
; CHECK: [[LOAD:%.*]] = call { i32, i32, i32, i32 } @llvm.dx.resource.load.cbufferrow.4.{{.*}}(target("dx.CBuffer", {{.*}})) [[CB]], i32 23)
189+
; CHECK: [[W:%.*]] = extractvalue { i32, i32, i32, i32 } [[LOAD]], 0
190+
; CHECK: [[DEST:%.*]] = getelementptr inbounds i8, ptr [[A8_COPY]], i32 12
191+
; CHECK: store i32 [[W]], ptr [[DEST]], align 4
192+
call void @llvm.memcpy.p0.p2.i32(ptr align 4 %a8.copy, ptr addrspace(2) align 4 @a8, i32 16, i1 false)
193+
194+
ret void
195+
}
196+
197+
declare void @llvm.memcpy.p0.p2.i32(ptr noalias writeonly captures(none), ptr addrspace(2) noalias readonly captures(none), i32, i1 immarg)
198+
199+
; CHECK-NOT: !hlsl.cbs =
200+
!hlsl.cbs = !{!0}
201+
202+
!0 = !{ptr @CB.cb, ptr addrspace(2) @a1, ptr addrspace(2) @a2, ptr addrspace(2) @a3, ptr addrspace(2) @a4, ptr addrspace(2) @a5, ptr addrspace(2) @a6, ptr addrspace(2) @a7, ptr addrspace(2) @a8}
203+
!1 = !{i32 0, i32 2}
204+
!2 = !{}

0 commit comments

Comments
 (0)