17
17
#include " llvm/InitializePasses.h"
18
18
#include " llvm/Pass.h"
19
19
#include " llvm/Support/ErrorHandling.h"
20
+ #include " llvm/Support/FormatVariadic.h"
20
21
#include " llvm/Transforms/Utils/Local.h"
21
22
22
23
#define DEBUG_TYPE " dxil-cbuffer-access"
@@ -57,109 +58,236 @@ struct CBufferRowIntrin {
57
58
}
58
59
}
59
60
};
60
- } // namespace
61
61
62
- static size_t getOffsetForCBufferGEP (GEPOperator *GEP, GlobalVariable *Global,
63
- const DataLayout &DL) {
64
- // Since we should always have a constant offset, we should only ever have a
65
- // single GEP of indirection from the Global.
66
- assert (GEP->getPointerOperand () == Global &&
67
- " Indirect access to resource handle" );
62
+ // Helper for creating CBuffer handles and loading data from them
63
+ struct CBufferResource {
64
+ GlobalVariable *GVHandle;
65
+ GlobalVariable *Member;
66
+ size_t MemberOffset;
68
67
69
- APInt ConstantOffset (DL.getIndexTypeSizeInBits (GEP->getType ()), 0 );
70
- bool Success = GEP->accumulateConstantOffset (DL, ConstantOffset);
71
- (void )Success;
72
- assert (Success && " Offsets into cbuffer globals must be constant" );
68
+ LoadInst *Handle;
73
69
74
- if (auto *ATy = dyn_cast<ArrayType>(Global->getValueType ()))
75
- ConstantOffset = hlsl::translateCBufArrayOffset (DL, ConstantOffset, ATy);
70
+ CBufferResource (GlobalVariable *GVHandle, GlobalVariable *Member,
71
+ size_t MemberOffset)
72
+ : GVHandle(GVHandle), Member(Member), MemberOffset(MemberOffset) {}
76
73
77
- return ConstantOffset.getZExtValue ();
78
- }
74
+ const DataLayout &getDataLayout () { return GVHandle->getDataLayout (); }
75
+ Type *getValueType () { return Member->getValueType (); }
76
+ iterator_range<ConstantDataSequential::user_iterator> users () {
77
+ return Member->users ();
78
+ }
79
79
80
- // / Replace access via cbuffer global with a load from the cbuffer handle
81
- // / itself.
82
- static void replaceAccess (LoadInst *LI, GlobalVariable *Global,
83
- GlobalVariable *HandleGV, size_t BaseOffset,
84
- SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
85
- const DataLayout &DL = HandleGV->getDataLayout ();
80
+ // / Get the byte offset of a Pointer-typed Value * `Val` relative to Member.
81
+ // / `Val` can either be Member itself, or a GEP of a constant offset from
82
+ // / Member
83
+ size_t getOffsetForCBufferGEP (Value *Val) {
84
+ assert (isa<PointerType>(Val->getType ()) &&
85
+ " Expected a pointer-typed value" );
86
+
87
+ if (Val == Member)
88
+ return 0 ;
89
+
90
+ if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
91
+ // Since we should always have a constant offset, we should only ever have
92
+ // a single GEP of indirection from the Global.
93
+ assert (GEP->getPointerOperand () == Member &&
94
+ " Indirect access to resource handle" );
95
+
96
+ const DataLayout &DL = getDataLayout ();
97
+ APInt ConstantOffset (DL.getIndexTypeSizeInBits (GEP->getType ()), 0 );
98
+ bool Success = GEP->accumulateConstantOffset (DL, ConstantOffset);
99
+ (void )Success;
100
+ assert (Success && " Offsets into cbuffer globals must be constant" );
101
+
102
+ if (auto *ATy = dyn_cast<ArrayType>(Member->getValueType ()))
103
+ ConstantOffset =
104
+ hlsl::translateCBufArrayOffset (DL, ConstantOffset, ATy);
105
+
106
+ return ConstantOffset.getZExtValue ();
107
+ }
86
108
87
- size_t Offset = BaseOffset;
88
- if (auto *GEP = dyn_cast<GEPOperator>(LI->getPointerOperand ()))
89
- Offset += getOffsetForCBufferGEP (GEP, Global, DL);
90
- else if (LI->getPointerOperand () != Global)
91
- llvm_unreachable (" Load instruction doesn't reference cbuffer global" );
109
+ llvm_unreachable (" Expected Val to be a GlobalVariable or GEP" );
110
+ }
92
111
93
- IRBuilder<> Builder (LI);
94
- auto *Handle = Builder.CreateLoad (HandleGV->getValueType (), HandleGV,
95
- HandleGV->getName ());
96
-
97
- Type *Ty = LI->getType ();
98
- CBufferRowIntrin Intrin (DL, Ty->getScalarType ());
99
- // The cbuffer consists of some number of 16-byte rows.
100
- unsigned int CurrentRow = Offset / hlsl::CBufferRowSizeInBytes;
101
- unsigned int CurrentIndex =
102
- (Offset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize ;
103
-
104
- auto *CBufLoad = Builder.CreateIntrinsic (
105
- Intrin.RetTy , Intrin.IID ,
106
- {Handle, ConstantInt::get (Builder.getInt32Ty (), CurrentRow)}, nullptr ,
107
- LI->getName ());
108
- auto *Elt =
109
- Builder.CreateExtractValue (CBufLoad, {CurrentIndex++}, LI->getName ());
110
-
111
- Value *Result = nullptr ;
112
- unsigned int Remaining =
113
- ((DL.getTypeSizeInBits (Ty) / 8 ) / Intrin.EltSize ) - 1 ;
114
- if (Remaining == 0 ) {
115
- // We only have a single element, so we're done.
116
- Result = Elt;
117
-
118
- // However, if we loaded a <1 x T>, then we need to adjust the type here.
119
- if (auto *VT = dyn_cast<FixedVectorType>(LI->getType ())) {
120
- assert (VT->getNumElements () == 1 && " Can't have multiple elements here" );
121
- Result = Builder.CreateInsertElement (PoisonValue::get (VT), Result,
122
- Builder.getInt32 (0 ));
123
- }
124
- } else {
125
- // Walk each element and extract it, wrapping to new rows as needed.
126
- SmallVector<Value *> Extracts{Elt};
127
- while (Remaining--) {
128
- CurrentIndex %= Intrin.NumElts ;
129
-
130
- if (CurrentIndex == 0 )
131
- CBufLoad = Builder.CreateIntrinsic (
132
- Intrin.RetTy , Intrin.IID ,
133
- {Handle, ConstantInt::get (Builder.getInt32Ty (), ++CurrentRow)},
134
- nullptr , LI->getName ());
135
-
136
- Extracts.push_back (Builder.CreateExtractValue (CBufLoad, {CurrentIndex++},
137
- LI->getName ()));
112
+ // / Create a handle for this cbuffer resource using the IRBuilder `Builder`
113
+ // / and sets the handle as the current one to use for subsequent calls to
114
+ // / `loadValue`
115
+ void createAndSetCurrentHandle (IRBuilder<> &Builder) {
116
+ Handle = Builder.CreateLoad (GVHandle->getValueType (), GVHandle,
117
+ GVHandle->getName ());
118
+ }
119
+
120
+ // / Load a value of type `Ty` at offset `Offset` using the handle from the
121
+ // / last call to `createAndSetCurrentHandle`
122
+ Value *loadValue (IRBuilder<> &Builder, Type *Ty, size_t Offset,
123
+ const Twine &Name = " " ) {
124
+ assert (Handle &&
125
+ " Expected a handle for this cbuffer global resource to be created "
126
+ " before loading a value from it" );
127
+ const DataLayout &DL = getDataLayout ();
128
+
129
+ size_t TargetOffset = MemberOffset + Offset;
130
+ CBufferRowIntrin Intrin (DL, Ty->getScalarType ());
131
+ // The cbuffer consists of some number of 16-byte rows.
132
+ unsigned int CurrentRow = TargetOffset / hlsl::CBufferRowSizeInBytes;
133
+ unsigned int CurrentIndex =
134
+ (TargetOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize ;
135
+
136
+ auto *CBufLoad = Builder.CreateIntrinsic (
137
+ Intrin.RetTy , Intrin.IID ,
138
+ {Handle, ConstantInt::get (Builder.getInt32Ty (), CurrentRow)}, nullptr ,
139
+ Name + " .load" );
140
+ auto *Elt = Builder.CreateExtractValue (CBufLoad, {CurrentIndex++},
141
+ Name + " .extract" );
142
+
143
+ Value *Result = nullptr ;
144
+ unsigned int Remaining =
145
+ ((DL.getTypeSizeInBits (Ty) / 8 ) / Intrin.EltSize ) - 1 ;
146
+ if (Remaining == 0 ) {
147
+ // We only have a single element, so we're done.
148
+ Result = Elt;
149
+
150
+ // However, if we loaded a <1 x T>, then we need to adjust the type here.
151
+ if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
152
+ assert (VT->getNumElements () == 1 &&
153
+ " Can't have multiple elements here" );
154
+ Result = Builder.CreateInsertElement (PoisonValue::get (VT), Result,
155
+ Builder.getInt32 (0 ), Name);
156
+ }
157
+ } else {
158
+ // Walk each element and extract it, wrapping to new rows as needed.
159
+ SmallVector<Value *> Extracts{Elt};
160
+ while (Remaining--) {
161
+ CurrentIndex %= Intrin.NumElts ;
162
+
163
+ if (CurrentIndex == 0 )
164
+ CBufLoad = Builder.CreateIntrinsic (
165
+ Intrin.RetTy , Intrin.IID ,
166
+ {Handle, ConstantInt::get (Builder.getInt32Ty (), ++CurrentRow)},
167
+ nullptr , Name + " .load" );
168
+
169
+ Extracts.push_back (Builder.CreateExtractValue (
170
+ CBufLoad, {CurrentIndex++}, Name + " .extract" ));
171
+ }
172
+
173
+ // Finally, we build up the original loaded value.
174
+ Result = PoisonValue::get (Ty);
175
+ for (int I = 0 , E = Extracts.size (); I < E; ++I)
176
+ Result = Builder.CreateInsertElement (Result, Extracts[I],
177
+ Builder.getInt32 (I),
178
+ Name + formatv (" .upto{}" , I));
138
179
}
139
180
140
- // Finally, we build up the original loaded value.
141
- Result = PoisonValue::get (Ty);
142
- for (int I = 0 , E = Extracts.size (); I < E; ++I)
143
- Result =
144
- Builder.CreateInsertElement (Result, Extracts[I], Builder.getInt32 (I));
181
+ return Result;
145
182
}
183
+ };
146
184
185
+ } // namespace
186
+
187
+ // / Replace load via cbuffer global with a load from the cbuffer handle itself.
188
+ static void replaceLoad (LoadInst *LI, CBufferResource &CBR,
189
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
190
+ size_t Offset = CBR.getOffsetForCBufferGEP (LI->getPointerOperand ());
191
+ IRBuilder<> Builder (LI);
192
+ CBR.createAndSetCurrentHandle (Builder);
193
+ Value *Result = CBR.loadValue (Builder, LI->getType (), Offset, LI->getName ());
147
194
LI->replaceAllUsesWith (Result);
148
195
DeadInsts.push_back (LI);
149
196
}
150
197
151
- static void replaceAccessesWithHandle (GlobalVariable *Global,
152
- GlobalVariable *HandleGV,
153
- size_t BaseOffset) {
198
+ // / Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle
199
+ // / itself. Assumes the cbuffer global is an array, and the length of bytes to
200
+ // / copy is divisible by array element allocation size.
201
+ // / The memcpy source must also be a direct cbuffer global reference, not a GEP.
202
+ static void replaceMemCpy (MemCpyInst *MCI, CBufferResource &CBR,
203
+ SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
204
+
205
+ ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType ());
206
+ assert (ArrTy && " MemCpy lowering is only supported for array types" );
207
+
208
+ // This assumption vastly simplifies the implementation
209
+ if (MCI->getSource () != CBR.Member )
210
+ reportFatalUsageError (
211
+ " Expected MemCpy source to be a cbuffer global variable" );
212
+
213
+ const std::string Name = (" memcpy." + MCI->getDest ()->getName () + " ." +
214
+ MCI->getSource ()->getName ())
215
+ .str ();
216
+
217
+ ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength ());
218
+ uint64_t ByteLength = Length->getZExtValue ();
219
+
220
+ // If length to copy is zero, no memcpy is needed
221
+ if (ByteLength == 0 ) {
222
+ DeadInsts.push_back (MCI);
223
+ return ;
224
+ }
225
+
226
+ const DataLayout &DL = CBR.getDataLayout ();
227
+
228
+ Type *ElemTy = ArrTy->getElementType ();
229
+ size_t ElemSize = DL.getTypeAllocSize (ElemTy);
230
+ assert (ByteLength % ElemSize == 0 &&
231
+ " Length of bytes to MemCpy must be divisible by allocation size of "
232
+ " source/destination array elements" );
233
+ size_t ElemsToCpy = ByteLength / ElemSize;
234
+
235
+ IRBuilder<> Builder (MCI);
236
+ CBR.createAndSetCurrentHandle (Builder);
237
+
238
+ auto CopyElemsImpl = [&Builder, &MCI, &Name, &CBR,
239
+ &DL](const auto &Self, ArrayType *ArrTy,
240
+ size_t ArrOffset, size_t N) -> void {
241
+ Type *ElemTy = ArrTy->getElementType ();
242
+ size_t ElemTySize = DL.getTypeAllocSize (ElemTy);
243
+ for (unsigned I = 0 ; I < N; ++I) {
244
+ size_t Offset = ArrOffset + I * ElemTySize;
245
+
246
+ // Recursively copy nested arrays
247
+ if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) {
248
+ Self (Self, ElemArrTy, Offset, ElemArrTy->getNumElements ());
249
+ continue ;
250
+ }
251
+
252
+ // Load CBuffer value and store it in Dest
253
+ APInt CBufArrayOffset (
254
+ DL.getIndexTypeSizeInBits (MCI->getSource ()->getType ()), Offset);
255
+ CBufArrayOffset =
256
+ hlsl::translateCBufArrayOffset (DL, CBufArrayOffset, ArrTy);
257
+ Value *CBufferVal =
258
+ CBR.loadValue (Builder, ElemTy, CBufArrayOffset.getZExtValue (), Name);
259
+ Value *GEP =
260
+ Builder.CreateInBoundsGEP (Builder.getInt8Ty (), MCI->getDest (),
261
+ {Builder.getInt32 (Offset)}, Name + " .dest" );
262
+ Builder.CreateStore (CBufferVal, GEP, MCI->isVolatile ());
263
+ }
264
+ };
265
+ auto CopyElems = [&CopyElemsImpl](ArrayType *ArrTy, size_t N) -> void {
266
+ CopyElemsImpl (CopyElemsImpl, ArrTy, 0 , N);
267
+ };
268
+
269
+ CopyElems (ArrTy, ElemsToCpy);
270
+
271
+ MCI->eraseFromParent ();
272
+ }
273
+
274
+ static void replaceAccessesWithHandle (CBufferResource &CBR) {
154
275
SmallVector<WeakTrackingVH> DeadInsts;
155
276
156
- SmallVector<User *> ToProcess{Global-> users ()};
277
+ SmallVector<User *> ToProcess{CBR. users ()};
157
278
while (!ToProcess.empty ()) {
158
279
User *Cur = ToProcess.pop_back_val ();
159
280
160
281
// If we have a load instruction, replace the access.
161
282
if (auto *LI = dyn_cast<LoadInst>(Cur)) {
162
- replaceAccess (LI, Global, HandleGV, BaseOffset, DeadInsts);
283
+ replaceLoad (LI, CBR, DeadInsts);
284
+ continue ;
285
+ }
286
+
287
+ // If we have a memcpy instruction, replace it with multiple accesses and
288
+ // subsequent stores to the destination
289
+ if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) {
290
+ replaceMemCpy (MCI, CBR, DeadInsts);
163
291
continue ;
164
292
}
165
293
@@ -181,7 +309,8 @@ static bool replaceCBufferAccesses(Module &M) {
181
309
182
310
for (const hlsl::CBufferMapping &Mapping : *CBufMD)
183
311
for (const hlsl::CBufferMember &Member : Mapping.Members ) {
184
- replaceAccessesWithHandle (Member.GV , Mapping.Handle , Member.Offset );
312
+ CBufferResource CBR (Mapping.Handle , Member.GV , Member.Offset );
313
+ replaceAccessesWithHandle (CBR);
185
314
Member.GV ->removeFromParent ();
186
315
}
187
316
0 commit comments