11
11
#include " llvm/Frontend/HLSL/CBuffer.h"
12
12
#include " llvm/Frontend/HLSL/HLSLResource.h"
13
13
#include " llvm/IR/IRBuilder.h"
14
+ #include " llvm/IR/IntrinsicInst.h"
14
15
#include " llvm/IR/IntrinsicsDirectX.h"
15
16
#include " llvm/InitializePasses.h"
16
17
#include " llvm/Pass.h"
18
+ #include " llvm/Support/FormatVariadic.h"
17
19
#include " llvm/Transforms/Utils/Local.h"
18
20
19
21
#define DEBUG_TYPE " dxil-cbuffer-access"
@@ -54,71 +56,106 @@ struct CBufferRowIntrin {
54
56
}
55
57
}
56
58
};
57
- } // namespace
58
59
59
- static size_t getOffsetForCBufferGEP (GEPOperator *GEP, GlobalVariable *Global,
60
- const DataLayout &DL) {
61
- // Since we should always have a constant offset, we should only ever have a
62
- // single GEP of indirection from the Global.
63
- assert (GEP->getPointerOperand () == Global &&
64
- " Indirect access to resource handle" );
60
+ // Helper for creating CBuffer handles and loading data from them
61
+ struct CBufferResource {
62
+ GlobalVariable *GVHandle;
63
+ GlobalVariable *Member;
64
+ size_t MemberOffset;
65
65
66
- APInt ConstantOffset (DL.getIndexTypeSizeInBits (GEP->getType ()), 0 );
67
- bool Success = GEP->accumulateConstantOffset (DL, ConstantOffset);
68
- (void )Success;
69
- assert (Success && " Offsets into cbuffer globals must be constant" );
66
+ LoadInst *Handle;
70
67
71
- if (auto *ATy = dyn_cast<ArrayType>(Global->getValueType ()))
72
- ConstantOffset = hlsl::translateCBufArrayOffset (DL, ConstantOffset, ATy);
68
+ CBufferResource (GlobalVariable *GVHandle, GlobalVariable *Member,
69
+ size_t MemberOffset)
70
+ : GVHandle(GVHandle), Member(Member), MemberOffset(MemberOffset) {}
73
71
74
- return ConstantOffset.getZExtValue ();
75
- }
72
+ const DataLayout &getDataLayout () { return GVHandle->getDataLayout (); }
73
+ Type *getValueType () { return Member->getValueType (); }
74
+ iterator_range<ConstantDataSequential::user_iterator> users () {
75
+ return Member->users ();
76
+ }
77
+
78
+ // / Get the byte offset of a Pointer-typed Value * `Val` relative to Member.
79
+ // / `Val` can either be Member itself, or a GEP of a constant offset from
80
+ // / Member
81
+ size_t getOffsetForCBufferGEP (Value *Val) {
82
+ assert (isa<PointerType>(Val->getType ()) &&
83
+ " Expected a pointer-typed value" );
84
+
85
+ if (Val == Member)
86
+ return 0 ;
87
+
88
+ if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
89
+ // Since we should always have a constant offset, we should only ever have
90
+ // a single GEP of indirection from the Global.
91
+ assert (GEP->getPointerOperand () == Member &&
92
+ " Indirect access to resource handle" );
93
+
94
+ const DataLayout &DL = getDataLayout ();
95
+ APInt ConstantOffset (DL.getIndexTypeSizeInBits (GEP->getType ()), 0 );
96
+ bool Success = GEP->accumulateConstantOffset (DL, ConstantOffset);
97
+ (void )Success;
98
+ assert (Success && " Offsets into cbuffer globals must be constant" );
99
+
100
+ if (auto *ATy = dyn_cast<ArrayType>(Member->getValueType ()))
101
+ ConstantOffset =
102
+ hlsl::translateCBufArrayOffset (DL, ConstantOffset, ATy);
103
+
104
+ return ConstantOffset.getZExtValue ();
105
+ }
76
106
77
- // / Replace access via cbuffer global with a load from the cbuffer handle
78
- // / itself.
79
- static void replaceAccess (LoadInst *LI, GlobalVariable *Global,
80
- GlobalVariable *HandleGV, size_t BaseOffset,
81
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
82
- const DataLayout &DL = HandleGV->getDataLayout ();
107
+ llvm_unreachable (" Expected Val to be a GlobalVariable or GEP" );
108
+ }
83
109
84
- size_t Offset = BaseOffset;
85
- if (auto *GEP = dyn_cast<GEPOperator>(LI->getPointerOperand ()))
86
- Offset += getOffsetForCBufferGEP (GEP, Global, DL);
87
- else if (LI->getPointerOperand () != Global)
88
- llvm_unreachable (" Load instruction doesn't reference cbuffer global" );
110
+ // / Create a handle for this cbuffer resource using the IRBuilder `Builder`
111
+ // / and sets the handle as the current one to use for subsequent calls to
112
+ // / `loadValue`
113
+ void createAndSetCurrentHandle (IRBuilder<> &Builder) {
114
+ Handle = Builder.CreateLoad (GVHandle->getValueType (), GVHandle,
115
+ GVHandle->getName ());
116
+ }
89
117
90
- IRBuilder<> Builder (LI);
91
- auto *Handle = Builder.CreateLoad (HandleGV->getValueType (), HandleGV,
92
- HandleGV->getName ());
93
-
94
- Type *Ty = LI->getType ();
95
- CBufferRowIntrin Intrin (DL, Ty->getScalarType ());
96
- // The cbuffer consists of some number of 16-byte rows.
97
- unsigned int CurrentRow = Offset / hlsl::CBufferRowSizeInBytes;
98
- unsigned int CurrentIndex =
99
- (Offset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize ;
100
-
101
- auto *CBufLoad = Builder.CreateIntrinsic (
102
- Intrin.RetTy , Intrin.IID ,
103
- {Handle, ConstantInt::get (Builder.getInt32Ty (), CurrentRow)}, nullptr ,
104
- LI->getName ());
105
- auto *Elt =
106
- Builder.CreateExtractValue (CBufLoad, {CurrentIndex++}, LI->getName ());
107
-
108
- Value *Result = nullptr ;
109
- unsigned int Remaining =
110
- ((DL.getTypeSizeInBits (Ty) / 8 ) / Intrin.EltSize ) - 1 ;
111
- if (Remaining == 0 ) {
112
- // We only have a single element, so we're done.
113
- Result = Elt;
114
-
115
- // However, if we loaded a <1 x T>, then we need to adjust the type here.
116
- if (auto *VT = dyn_cast<FixedVectorType>(LI->getType ())) {
117
- assert (VT->getNumElements () == 1 && " Can't have multiple elements here" );
118
- Result = Builder.CreateInsertElement (PoisonValue::get (VT), Result,
119
- Builder.getInt32 (0 ));
118
+ // / Load a value of type `Ty` at offset `Offset` using the handle from the
119
+ // / last call to `createAndSetCurrentHandle`
120
+ Value *loadValue (IRBuilder<> &Builder, Type *Ty, size_t Offset,
121
+ const Twine &Name = " " ) {
122
+ assert (Handle &&
123
+ " Expected a handle for this cbuffer global resource to be created "
124
+ " before loading a value from it" );
125
+ const DataLayout &DL = getDataLayout ();
126
+
127
+ size_t TargetOffset = MemberOffset + Offset;
128
+ CBufferRowIntrin Intrin (DL, Ty->getScalarType ());
129
+ // The cbuffer consists of some number of 16-byte rows.
130
+ unsigned int CurrentRow = TargetOffset / hlsl::CBufferRowSizeInBytes;
131
+ unsigned int CurrentIndex =
132
+ (TargetOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize ;
133
+
134
+ auto *CBufLoad = Builder.CreateIntrinsic (
135
+ Intrin.RetTy , Intrin.IID ,
136
+ {Handle, ConstantInt::get (Builder.getInt32Ty (), CurrentRow)}, nullptr ,
137
+ Name + " .load" );
138
+ auto *Elt = Builder.CreateExtractValue (CBufLoad, {CurrentIndex++},
139
+ Name + " .extract" );
140
+
141
+ Value *Result = nullptr ;
142
+ unsigned int Remaining =
143
+ ((DL.getTypeSizeInBits (Ty) / 8 ) / Intrin.EltSize ) - 1 ;
144
+
145
+ if (Remaining == 0 ) {
146
+ // We only have a single element, so we're done.
147
+ Result = Elt;
148
+
149
+ // However, if we loaded a <1 x T>, then we need to adjust the type here.
150
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
151
+ assert (VT->getNumElements () == 1 &&
152
+ " Can't have multiple elements here" );
153
+ Result = Builder.CreateInsertElement (PoisonValue::get (VT), Result,
154
+ Builder.getInt32 (0 ), Name);
155
+ }
156
+ return Result;
120
157
}
121
- } else {
158
+
122
159
// Walk each element and extract it, wrapping to new rows as needed.
123
160
SmallVector<Value *> Extracts{Elt};
124
161
while (Remaining--) {
@@ -128,40 +165,138 @@ static void replaceAccess(LoadInst *LI, GlobalVariable *Global,
128
165
CBufLoad = Builder.CreateIntrinsic (
129
166
Intrin.RetTy , Intrin.IID ,
130
167
{Handle, ConstantInt::get (Builder.getInt32Ty (), ++CurrentRow)},
131
- nullptr , LI-> getName () );
168
+ nullptr , Name + " .load " );
132
169
133
170
Extracts.push_back (Builder.CreateExtractValue (CBufLoad, {CurrentIndex++},
134
- LI-> getName () ));
171
+ Name + " .extract " ));
135
172
}
136
173
137
174
// Finally, we build up the original loaded value.
138
175
Result = PoisonValue::get (Ty);
139
176
for (int I = 0 , E = Extracts.size (); I < E; ++I)
140
177
Result =
141
- Builder.CreateInsertElement (Result, Extracts[I], Builder.getInt32 (I));
178
+ Builder.CreateInsertElement (Result, Extracts[I], Builder.getInt32 (I),
179
+ Name + formatv (" .upto{}" , I));
180
+ return Result;
142
181
}
182
+ };
143
183
184
+ } // namespace
185
+
186
+ // / Replace load via cbuffer global with a load from the cbuffer handle itself.
187
+ static void replaceLoad (LoadInst *LI, CBufferResource &CBR,
188
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
189
+ size_t Offset = CBR.getOffsetForCBufferGEP (LI->getPointerOperand ());
190
+ IRBuilder<> Builder (LI);
191
+ CBR.createAndSetCurrentHandle (Builder);
192
+ Value *Result = CBR.loadValue (Builder, LI->getType (), Offset, LI->getName ());
144
193
LI->replaceAllUsesWith (Result);
145
194
DeadInsts.push_back (LI);
146
195
}
147
196
148
- static void replaceAccessesWithHandle (GlobalVariable *Global,
149
- GlobalVariable *HandleGV,
150
- size_t BaseOffset) {
197
+ // / This function recursively copies N array elements from the cbuffer resource
198
+ // / CBR to the MemCpy Destination. Recursion is used to unravel multidimensional
199
+ // / arrays into a sequence of scalar/vector extracts and stores.
200
+ static void copyArrayElemsForMemCpy (IRBuilder<> &Builder, MemCpyInst *MCI,
201
+ CBufferResource &CBR, ArrayType *ArrTy,
202
+ size_t ArrOffset, size_t N,
203
+ const Twine &Name = " " ) {
204
+ const DataLayout &DL = MCI->getDataLayout ();
205
+ Type *ElemTy = ArrTy->getElementType ();
206
+ size_t ElemTySize = DL.getTypeAllocSize (ElemTy);
207
+ for (unsigned I = 0 ; I < N; ++I) {
208
+ size_t Offset = ArrOffset + I * ElemTySize;
209
+
210
+ // Recursively copy nested arrays
211
+ if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) {
212
+ copyArrayElemsForMemCpy (Builder, MCI, CBR, ElemArrTy, Offset,
213
+ ElemArrTy->getNumElements (), Name);
214
+ continue ;
215
+ }
216
+
217
+ // Load CBuffer value and store it in Dest
218
+ APInt CBufArrayOffset (
219
+ DL.getIndexTypeSizeInBits (MCI->getSource ()->getType ()), Offset);
220
+ CBufArrayOffset =
221
+ hlsl::translateCBufArrayOffset (DL, CBufArrayOffset, ArrTy);
222
+ Value *CBufferVal =
223
+ CBR.loadValue (Builder, ElemTy, CBufArrayOffset.getZExtValue (), Name);
224
+ Value *GEP =
225
+ Builder.CreateInBoundsGEP (Builder.getInt8Ty (), MCI->getDest (),
226
+ {Builder.getInt32 (Offset)}, Name + " .dest" );
227
+ Builder.CreateStore (CBufferVal, GEP, MCI->isVolatile ());
228
+ }
229
+ }
230
+
231
+ // / Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle
232
+ // / itself. Assumes the cbuffer global is an array, and the length of bytes to
233
+ // / copy is divisible by array element allocation size.
234
+ // / The memcpy source must also be a direct cbuffer global reference, not a GEP.
235
+ static void replaceMemCpy (MemCpyInst *MCI, CBufferResource &CBR) {
236
+
237
+ ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType ());
238
+ assert (ArrTy && " MemCpy lowering is only supported for array types" );
239
+
240
+ // This assumption vastly simplifies the implementation
241
+ if (MCI->getSource () != CBR.Member )
242
+ reportFatalUsageError (
243
+ " Expected MemCpy source to be a cbuffer global variable" );
244
+
245
+ ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength ());
246
+ uint64_t ByteLength = Length->getZExtValue ();
247
+
248
+ // If length to copy is zero, no memcpy is needed
249
+ if (ByteLength == 0 ) {
250
+ MCI->eraseFromParent ();
251
+ return ;
252
+ }
253
+
254
+ const DataLayout &DL = CBR.getDataLayout ();
255
+
256
+ Type *ElemTy = ArrTy->getElementType ();
257
+ size_t ElemSize = DL.getTypeAllocSize (ElemTy);
258
+ assert (ByteLength % ElemSize == 0 &&
259
+ " Length of bytes to MemCpy must be divisible by allocation size of "
260
+ " source/destination array elements" );
261
+ size_t ElemsToCpy = ByteLength / ElemSize;
262
+
263
+ IRBuilder<> Builder (MCI);
264
+ CBR.createAndSetCurrentHandle (Builder);
265
+
266
+ copyArrayElemsForMemCpy (Builder, MCI, CBR, ArrTy, 0 , ElemsToCpy,
267
+ " memcpy." + MCI->getDest ()->getName () + " ." +
268
+ MCI->getSource ()->getName ());
269
+
270
+ MCI->eraseFromParent ();
271
+ }
272
+
273
+ static void replaceAccessesWithHandle (CBufferResource &CBR) {
151
274
SmallVector<WeakTrackingVH> DeadInsts;
152
275
153
- SmallVector<User *> ToProcess{Global-> users ()};
276
+ SmallVector<User *> ToProcess{CBR. users ()};
154
277
while (!ToProcess.empty ()) {
155
278
User *Cur = ToProcess.pop_back_val ();
156
279
157
280
// If we have a load instruction, replace the access.
158
281
if (auto *LI = dyn_cast<LoadInst>(Cur)) {
159
- replaceAccess (LI, Global, HandleGV, BaseOffset, DeadInsts);
282
+ replaceLoad (LI, CBR, DeadInsts);
283
+ continue ;
284
+ }
285
+
286
+ // If we have a memcpy instruction, replace it with multiple accesses and
287
+ // subsequent stores to the destination
288
+ if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) {
289
+ replaceMemCpy (MCI, CBR);
160
290
continue ;
161
291
}
162
292
163
293
// Otherwise, walk users looking for a load...
164
- ToProcess.append (Cur->user_begin (), Cur->user_end ());
294
+ if (isa<GetElementPtrInst>(Cur) || isa<GEPOperator>(Cur)) {
295
+ ToProcess.append (Cur->user_begin (), Cur->user_end ());
296
+ continue ;
297
+ }
298
+
299
+ llvm_unreachable (" Unexpected user of Global" );
165
300
}
166
301
RecursivelyDeleteTriviallyDeadInstructions (DeadInsts);
167
302
}
@@ -173,7 +308,8 @@ static bool replaceCBufferAccesses(Module &M) {
173
308
174
309
for (const hlsl::CBufferMapping &Mapping : *CBufMD)
175
310
for (const hlsl::CBufferMember &Member : Mapping.Members ) {
176
- replaceAccessesWithHandle (Member.GV , Mapping.Handle , Member.Offset );
311
+ CBufferResource CBR (Mapping.Handle , Member.GV , Member.Offset );
312
+ replaceAccessesWithHandle (CBR);
177
313
Member.GV ->removeFromParent ();
178
314
}
179
315
0 commit comments