@@ -51,8 +51,8 @@ bool PromoteStatelessToBindless::runOnFunction(Function& F)
51
51
if (HasStackCall)
52
52
return false ;
53
53
54
- m_AccessToSrcPtrMap .clear ();
55
- m_AddressUsedSrcPtrMap .clear ();
54
+ m_SrcPtrNeedStatelessAccess .clear ();
55
+ m_SrcPtrToAccessMap .clear ();
56
56
if (!ClContext->m_InternalOptions .UseBindlessPrintf )
57
57
{
58
58
CheckPrintfBuffer (F);
@@ -83,6 +83,8 @@ void PromoteStatelessToBindless::CheckPrintfBuffer(Function& F)
83
83
84
84
void PromoteStatelessToBindless::GetAccessInstToSrcPointerMap (Instruction* inst, Value* resourcePtr)
85
85
{
86
+ bool canPromoteAccess = true ;
87
+ auto modMD = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData ();
86
88
unsigned addrSpace = resourcePtr->getType ()->getPointerAddressSpace ();
87
89
88
90
if (addrSpace != ADDRESS_SPACE_GLOBAL && addrSpace != ADDRESS_SPACE_CONSTANT)
@@ -103,14 +105,16 @@ void PromoteStatelessToBindless::GetAccessInstToSrcPointerMap(Instruction* inst,
103
105
break ;
104
106
case GenISAIntrinsic::GenISA_intatomicrawA64:
105
107
// Ignore a buffer in this intrinsic, keep it stateless.
106
- return ;
108
+ canPromoteAccess = false ;
109
+ break ;
107
110
default :
108
111
IGC_ASSERT_MESSAGE (0 , " Unsupported Instruction" );
109
- return ;
112
+ canPromoteAccess = false ;
113
+ break ;
110
114
}
111
115
}
112
116
else
113
- return ;
117
+ canPromoteAccess = false ;
114
118
}
115
119
116
120
std::vector<Value*> tempList;
@@ -130,22 +134,36 @@ void PromoteStatelessToBindless::GetAccessInstToSrcPointerMap(Instruction* inst,
130
134
// printf buffer address (through atomic add), see printf implementation in
131
135
// OpenCLPrintfResolution.cpp. Currently keep printf implementation as stateless and
132
136
// thus skip printf buffer for now.
133
- return ;
137
+ canPromoteAccess = false ;
134
138
}
135
139
136
- m_promotedArgs.insert (cast<Argument>(srcPtr)->getArgNo ());
137
-
138
- // Save the instruction, which makes access (load/store/intrinsic) to the buffer
139
- m_AccessToSrcPtrMap[inst] = srcPtr;
140
- // Save the instruction, which generate an address of the buffer. This is the
141
- // instruction right before the last one. The last one has to be the buffer itself.
142
- if (tempList.size () > 1 )
140
+ if (modMD->compOpt .UseLegacyBindlessMode )
143
141
{
144
- m_AddressUsedSrcPtrMap[tempList[tempList.size ()-2 ]] = srcPtr;
142
+ if (!canPromoteAccess)
143
+ {
144
+ // In this case, the srcPtr is traced to a kernel arg, but the access instruction does not support
145
+ // bindless access, so we have to make all access stateless.
146
+ // Remove all access instructions of this srcPtr that may have been added in previous passes, to
147
+ // prevent promoting it to bindless.
148
+ m_SrcPtrNeedStatelessAccess.insert (srcPtr);
149
+ m_SrcPtrToAccessMap.erase (srcPtr);
150
+ return ;
151
+ }
152
+ else if (m_SrcPtrNeedStatelessAccess.count (srcPtr) != 0 )
153
+ {
154
+ return ;
155
+ }
145
156
}
146
- else
157
+
158
+ if (canPromoteAccess)
147
159
{
148
- m_AddressUsedSrcPtrMap[inst] = srcPtr;
160
+ // Save the instruction, which makes access (load/store/intrinsic) to the buffer
161
+ Value* accessInst = inst;
162
+ // Save the instruction, which generate an address of the buffer. This is the
163
+ // instruction right before the last one. The last one has to be the buffer itself.
164
+ Value* addrUsedInst = (tempList.size () > 1 ) ? tempList[tempList.size () - 2 ] : inst;
165
+
166
+ m_SrcPtrToAccessMap[srcPtr].push_back (std::make_pair (accessInst, addrUsedInst));
149
167
}
150
168
}
151
169
@@ -164,85 +182,95 @@ void PromoteStatelessToBindless::PromoteStatelessToBindlessBuffers(Function& F)
164
182
165
183
bool supportDynamicBTIsAllocation = ctx->platform .supportDynamicBTIsAllocation ();
166
184
167
- // Modify the reference to the buffer not through all users but only in instructions
168
- // which are used in accesing (load/store) the buffer.
169
- for (auto inst : m_AddressUsedSrcPtrMap)
185
+ for (auto iter : m_SrcPtrToAccessMap)
170
186
{
171
- Instruction* accessInst = cast<Instruction>(inst.first );
172
- Argument* srcPtr = cast<Argument>(inst.second );
173
-
174
- Value* nullSrcPtr = ConstantPointerNull::get (cast<PointerType>(srcPtr->getType ()));
175
- accessInst->replaceUsesOfWith (srcPtr, nullSrcPtr);
187
+ Argument* srcPtr = cast<Argument>(iter.first );
176
188
177
189
ArgAllocMD* argInfo = &resourceAlloc->argAllocMDList [srcPtr->getArgNo ()];
178
190
IGC_ASSERT_MESSAGE ((size_t )srcPtr->getArgNo () < resourceAlloc->argAllocMDList .size (), " ArgAllocMD List Out of Bounds" );
179
- // Update metadata to show bindless resource type
180
- argInfo->type = ResourceTypeEnum::BindlessUAVResourceType;
191
+
192
+ if (modMD->compOpt .UseLegacyBindlessMode )
193
+ {
194
+ // Update metadata to show bindless resource type.
195
+ // Do this only for legacy mode, since the resource type of the original
196
+ // kernel arg needs to be bindless for it to be reinterpreted as a bindless offset.
197
+ // In advanced mode, always keep the original kernel arg as stateless, and use the
198
+ // IMPLICIT_BUFFER_OFFSET arg for bindless access.
199
+ argInfo->type = ResourceTypeEnum::BindlessUAVResourceType;
200
+ }
201
+
181
202
if (supportDynamicBTIsAllocation)
182
203
{
183
204
argInfo->indexType =
184
205
resourceAlloc->uavsNumType +
185
- (unsigned )std::distance (m_promotedArgs .begin (), m_promotedArgs .find (srcPtr-> getArgNo () ));
206
+ (unsigned )std::distance (m_SrcPtrToAccessMap .begin (), m_SrcPtrToAccessMap .find (srcPtr));
186
207
}
187
- }
188
-
189
- if (supportDynamicBTIsAllocation)
190
- resourceAlloc->uavsNumType += m_promotedArgs.size ();
191
208
192
- for (auto inst : m_AccessToSrcPtrMap)
193
- {
194
- Instruction* accessInst = cast<Instruction>(inst.first );
195
- Argument* srcPtr = cast<Argument>(inst.second );
196
-
197
- // Get the base bindless pointer
198
- IGCIRBuilder<> builder (accessInst);
199
- Value* resourcePtr = IGC::GetBufferOperand (accessInst);
200
- IGC_ASSERT (resourcePtr);
201
- unsigned bindlessAS = IGC::EncodeAS4GFXResource (*UndefValue::get (builder.getInt32Ty ()), IGC::BINDLESS);
202
- PointerType* basePointerType = PointerType::get (IGCLLVM::getNonOpaquePtrEltTy (resourcePtr->getType ()), bindlessAS);
203
- Value* bufferOffset = builder.CreatePtrToInt (resourcePtr, builder.getInt32Ty ());
204
-
205
- Value* basePointer = nullptr ;
206
- if (!modMD->compOpt .UseLegacyBindlessMode ) {
207
- Argument * srcOffset = implicitArgs.getNumberedImplicitArg (F, ImplicitArg::BINDLESS_OFFSET, srcPtr->getArgNo ());
208
- basePointer = builder.CreateIntToPtr (srcOffset, basePointerType);
209
- } else {
210
- basePointer = builder.CreatePointerCast (srcPtr, basePointerType);
211
- }
212
-
213
- if (LoadInst * load = dyn_cast<LoadInst>(accessInst))
214
- {
215
- Value* ldraw = IGC::CreateLoadRawIntrinsic (load, cast<Instruction>(basePointer), bufferOffset);
216
- load->replaceAllUsesWith (ldraw);
217
- load->eraseFromParent ();
218
- }
219
- else if (StoreInst * store = dyn_cast<StoreInst>(accessInst))
209
+ // Loop through all access instructions for srcPtr
210
+ for (auto insts : iter.second )
220
211
{
221
- IGC::CreateStoreRawIntrinsic (store, cast<Instruction>(basePointer), bufferOffset);
222
- store->eraseFromParent ();
223
- }
224
- else if (GenIntrinsicInst * pIntr = dyn_cast<GenIntrinsicInst>(accessInst))
225
- {
226
- if (pIntr->getIntrinsicID () == GenISAIntrinsic::GenISA_simdBlockRead)
212
+ Instruction* accessInst = cast<Instruction>(insts.first );
213
+ Instruction* addrUsedInst = cast<Instruction>(insts.second );
214
+
215
+ // Modify the reference to the buffer not through all users but only in instructions
216
+ // which are used in accesing (load/store) the buffer.
217
+ Value* nullSrcPtr = ConstantPointerNull::get (cast<PointerType>(srcPtr->getType ()));
218
+ addrUsedInst->replaceUsesOfWith (srcPtr, nullSrcPtr);
219
+
220
+ // Get the base bindless pointer
221
+ IGCIRBuilder<> builder (accessInst);
222
+ Value* resourcePtr = IGC::GetBufferOperand (accessInst);
223
+ IGC_ASSERT (resourcePtr);
224
+ unsigned bindlessAS = IGC::EncodeAS4GFXResource (*UndefValue::get (builder.getInt32Ty ()), IGC::BINDLESS);
225
+ PointerType* basePointerType = PointerType::get (IGCLLVM::getNonOpaquePtrEltTy (resourcePtr->getType ()), bindlessAS);
226
+ Value* bufferOffset = builder.CreatePtrToInt (resourcePtr, builder.getInt32Ty ());
227
+
228
+ Value* basePointer = nullptr ;
229
+ if (!modMD->compOpt .UseLegacyBindlessMode ) {
230
+ Argument* srcOffset = implicitArgs.getNumberedImplicitArg (F, ImplicitArg::BINDLESS_OFFSET, srcPtr->getArgNo ());
231
+ basePointer = builder.CreateIntToPtr (srcOffset, basePointerType);
232
+ }
233
+ else {
234
+ basePointer = builder.CreatePointerCast (srcPtr, basePointerType);
235
+ }
236
+
237
+ if (LoadInst* load = dyn_cast<LoadInst>(accessInst))
227
238
{
228
- Function* newBlockReadFunc = GenISAIntrinsic::getDeclaration (F.getParent (),
229
- GenISAIntrinsic::GenISA_simdBlockReadBindless,
230
- { accessInst->getType (), basePointer->getType (),Type::getInt32Ty (accessInst->getContext ()) });
231
- Instruction* newBlockRead = CallInst::Create (newBlockReadFunc, { basePointer, bufferOffset }, " " , accessInst);
232
- newBlockRead->setDebugLoc (pIntr->getDebugLoc ());
233
- accessInst->replaceAllUsesWith (newBlockRead);
234
- accessInst->eraseFromParent ();
239
+ Value* ldraw = IGC::CreateLoadRawIntrinsic (load, cast<Instruction>(basePointer), bufferOffset);
240
+ load->replaceAllUsesWith (ldraw);
241
+ load->eraseFromParent ();
235
242
}
236
- else if (pIntr-> getIntrinsicID () == GenISAIntrinsic::GenISA_simdBlockWrite )
243
+ else if (StoreInst* store = dyn_cast<StoreInst>(accessInst) )
237
244
{
238
- Function* newBlockWriteFunc = GenISAIntrinsic::getDeclaration (F.getParent (),
239
- GenISAIntrinsic::GenISA_simdBlockWriteBindless,
240
- { basePointer->getType (), pIntr->getOperand (1 )->getType (), Type::getInt32Ty (accessInst->getContext ()) });
241
- Instruction* newBlockWrite = CallInst::Create (newBlockWriteFunc, { basePointer, pIntr->getOperand (1 ), bufferOffset }, " " , accessInst);
242
- newBlockWrite->setDebugLoc (pIntr->getDebugLoc ());
243
- accessInst->replaceAllUsesWith (newBlockWrite);
244
- accessInst->eraseFromParent ();
245
+ IGC::CreateStoreRawIntrinsic (store, cast<Instruction>(basePointer), bufferOffset);
246
+ store->eraseFromParent ();
247
+ }
248
+ else if (GenIntrinsicInst* pIntr = dyn_cast<GenIntrinsicInst>(accessInst))
249
+ {
250
+ if (pIntr->getIntrinsicID () == GenISAIntrinsic::GenISA_simdBlockRead)
251
+ {
252
+ Function* newBlockReadFunc = GenISAIntrinsic::getDeclaration (F.getParent (),
253
+ GenISAIntrinsic::GenISA_simdBlockReadBindless,
254
+ { accessInst->getType (), basePointer->getType (),Type::getInt32Ty (accessInst->getContext ()) });
255
+ Instruction* newBlockRead = CallInst::Create (newBlockReadFunc, { basePointer, bufferOffset }, " " , accessInst);
256
+ newBlockRead->setDebugLoc (pIntr->getDebugLoc ());
257
+ accessInst->replaceAllUsesWith (newBlockRead);
258
+ accessInst->eraseFromParent ();
259
+ }
260
+ else if (pIntr->getIntrinsicID () == GenISAIntrinsic::GenISA_simdBlockWrite)
261
+ {
262
+ Function* newBlockWriteFunc = GenISAIntrinsic::getDeclaration (F.getParent (),
263
+ GenISAIntrinsic::GenISA_simdBlockWriteBindless,
264
+ { basePointer->getType (), pIntr->getOperand (1 )->getType (), Type::getInt32Ty (accessInst->getContext ()) });
265
+ Instruction* newBlockWrite = CallInst::Create (newBlockWriteFunc, { basePointer, pIntr->getOperand (1 ), bufferOffset }, " " , accessInst);
266
+ newBlockWrite->setDebugLoc (pIntr->getDebugLoc ());
267
+ accessInst->replaceAllUsesWith (newBlockWrite);
268
+ accessInst->eraseFromParent ();
269
+ }
245
270
}
246
271
}
247
272
}
273
+
274
+ if (supportDynamicBTIsAllocation)
275
+ resourceAlloc->uavsNumType += m_SrcPtrToAccessMap.size ();
248
276
}
0 commit comments