@@ -191,17 +191,19 @@ Value *GenXLoadStoreLegalization::splitMemoryOperation(Value *InsertTo,
191
191
192
192
for (; Index + SplitWidth <= ExecSize; Index += SplitWidth) {
193
193
SmallVector<Value *, 13 > Args;
194
- llvm::transform (CI.args (), std::back_inserter (Args), [&](Value *Arg) {
195
- auto *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Arg->getType ());
196
- if (!VTy)
197
- return Arg;
198
- auto NumElements = VTy->getNumElements ();
199
- bool IsSOA = NumElements == VectorSize * ExecSize;
200
- if (NumElements != ExecSize && !IsSOA)
201
- return Arg;
202
- return createExtractFromSOAValue (&CI, Arg, IsSOA ? VectorSize : 1 , Index,
203
- SplitWidth);
204
- });
194
+ std::transform (CI.arg_begin (), CI.arg_end (), Func->arg_begin (),
195
+ std::back_inserter (Args), [&](Value *Arg, auto &NewArg) {
196
+ auto *VTy =
197
+ dyn_cast<IGCLLVM::FixedVectorType>(Arg->getType ());
198
+ auto *NewTy = NewArg.getType ();
199
+ if (!VTy || VTy == NewTy)
200
+ return Arg;
201
+ auto NumElements = VTy->getNumElements ();
202
+ bool IsSOA = NumElements == VectorSize * ExecSize;
203
+ IGC_ASSERT (NumElements == ExecSize || IsSOA);
204
+ return createExtractFromSOAValue (
205
+ &CI, Arg, IsSOA ? VectorSize : 1 , Index, SplitWidth);
206
+ });
205
207
206
208
auto *NewCI = Builder.CreateCall (Func, Args);
207
209
LLVM_DEBUG (dbgs () << " Created split: " << *NewCI << " \n " );
@@ -231,56 +233,61 @@ Value *GenXLoadStoreLegalization::extendMemoryOperation(Value *InsertTo,
231
233
IRBuilder<> Builder (&CI);
232
234
233
235
SmallVector<Value *, 13 > Args;
234
- llvm::transform (CI.args (), std::back_inserter (Args), [&](Value *Arg) -> Value* {
235
- auto *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Arg->getType ());
236
- if (!VTy)
237
- return Arg;
238
-
239
- auto NumElements = VTy->getNumElements ();
240
- bool IsSOA = NumElements == VectorSize * ExecSize;
241
- if (NumElements != ExecSize && !IsSOA)
242
- return Arg;
243
-
244
- if (Index > 0 )
245
- Arg = createExtractFromSOAValue (&CI, Arg, IsSOA ? VectorSize : 1 , Index,
246
- RestSize);
247
- if (RestSize == ExtendWidth)
248
- return Arg;
249
-
250
- auto *ETy = VTy->getElementType ();
251
- auto *InsTy = IGCLLVM::FixedVectorType::get (
252
- ETy, IsSOA ? VectorSize * ExtendWidth : ExtendWidth);
253
- if (!ETy->isIntegerTy (1 ))
254
- return createInsertToSOAValue (&CI, UndefValue::get (InsTy), Arg,
255
- IsSOA ? VectorSize : 1 , 0 , RestSize);
256
- if (RestSize == 4 || RestSize == 8 || RestSize == 16 || !isa<CmpInst>(Arg))
257
- return createInsertToSOAValue (&CI, Constant::getNullValue (InsTy), Arg,
258
- IsSOA ? VectorSize : 1 , 0 , RestSize);
259
-
260
- // If a predicate is illegally sized, it will cause problems later in
261
- // GenXLegalization pass because wrpredregion must follow offset alignment
262
- // restrictions. In case when the illegal predicate is result of cmp
263
- // instruction we can also extend this instruction instead of just
264
- // writing its result into a legal-sized predicate.
265
- auto *Cmp = cast<CmpInst>(Arg);
266
- auto *OpVTy = cast<IGCLLVM::FixedVectorType>(Cmp->getOperand (0 )->getType ());
267
- auto *NewOpTy = IGCLLVM::FixedVectorType::get (
268
- OpVTy->getElementType (),
269
- IsSOA ? VectorSize * ExtendWidth : ExtendWidth);
270
- auto *NewOp0 = createInsertToSOAValue (&CI, Constant::getNullValue (NewOpTy),
271
- Cmp->getOperand (0 ),
272
- IsSOA ? VectorSize : 1 , 0 , RestSize);
273
- auto *NewOp1 = createInsertToSOAValue (
274
- &CI,
275
- Cmp->isTrueWhenEqual () ? Constant::getAllOnesValue (NewOpTy)
276
- : Constant::getNullValue (NewOpTy),
277
- Cmp->getOperand (1 ), IsSOA ? VectorSize : 1 , 0 , RestSize);
278
- auto *NewCmp =
279
- CmpInst::Create (Cmp->getOpcode (), Cmp->getPredicate (), NewOp0, NewOp1,
280
- Cmp->getName () + " .extended" , &CI);
281
- NewCmp->setDebugLoc (Cmp->getDebugLoc ());
282
- return NewCmp;
283
- });
236
+ std::transform (
237
+ CI.arg_begin (), CI.arg_end (), Func->arg_begin (), std::back_inserter (Args),
238
+ [&](Value *Arg, auto &NewArg) -> Value * {
239
+ auto *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Arg->getType ());
240
+ auto *NewTy = NewArg.getType ();
241
+ if (!VTy || VTy == NewTy)
242
+ return Arg;
243
+
244
+ auto NumElements = VTy->getNumElements ();
245
+ bool IsSOA = NumElements == VectorSize * ExecSize;
246
+ if (NumElements != ExecSize && !IsSOA)
247
+ return Arg;
248
+
249
+ if (Index > 0 )
250
+ Arg = createExtractFromSOAValue (&CI, Arg, IsSOA ? VectorSize : 1 ,
251
+ Index, RestSize);
252
+ if (RestSize == ExtendWidth)
253
+ return Arg;
254
+
255
+ auto *ETy = VTy->getElementType ();
256
+ auto *InsTy = IGCLLVM::FixedVectorType::get (
257
+ ETy, IsSOA ? VectorSize * ExtendWidth : ExtendWidth);
258
+ if (!ETy->isIntegerTy (1 ))
259
+ return createInsertToSOAValue (&CI, UndefValue::get (InsTy), Arg,
260
+ IsSOA ? VectorSize : 1 , 0 , RestSize);
261
+ if (RestSize == 4 || RestSize == 8 || RestSize == 16 ||
262
+ !isa<CmpInst>(Arg))
263
+ return createInsertToSOAValue (&CI, Constant::getNullValue (InsTy), Arg,
264
+ IsSOA ? VectorSize : 1 , 0 , RestSize);
265
+
266
+ // If a predicate is illegally sized, it will cause problems later in
267
+ // GenXLegalization pass because wrpredregion must follow offset
268
+ // alignment restrictions. In case when the illegal predicate is result
269
+ // of cmp instruction we can also extend this instruction instead of
270
+ // just writing its result into a legal-sized predicate.
271
+ auto *Cmp = cast<CmpInst>(Arg);
272
+ auto *OpVTy =
273
+ cast<IGCLLVM::FixedVectorType>(Cmp->getOperand (0 )->getType ());
274
+ auto *NewOpTy = IGCLLVM::FixedVectorType::get (
275
+ OpVTy->getElementType (),
276
+ IsSOA ? VectorSize * ExtendWidth : ExtendWidth);
277
+ auto *NewOp0 = createInsertToSOAValue (
278
+ &CI, Constant::getNullValue (NewOpTy), Cmp->getOperand (0 ),
279
+ IsSOA ? VectorSize : 1 , 0 , RestSize);
280
+ auto *NewOp1 = createInsertToSOAValue (
281
+ &CI,
282
+ Cmp->isTrueWhenEqual () ? Constant::getAllOnesValue (NewOpTy)
283
+ : Constant::getNullValue (NewOpTy),
284
+ Cmp->getOperand (1 ), IsSOA ? VectorSize : 1 , 0 , RestSize);
285
+ auto *NewCmp =
286
+ CmpInst::Create (Cmp->getOpcode (), Cmp->getPredicate (), NewOp0,
287
+ NewOp1, Cmp->getName () + " .extended" , &CI);
288
+ NewCmp->setDebugLoc (Cmp->getDebugLoc ());
289
+ return NewCmp;
290
+ });
284
291
285
292
Value *Res = Builder.CreateCall (Func, Args);
286
293
LLVM_DEBUG (dbgs () << " Created extend: " << *Res << " \n " );
@@ -316,13 +323,14 @@ GenXLoadStoreLegalization::getMemoryIntrinsic(CallInst &CI,
316
323
OverloadedTypes.push_back (VTy);
317
324
}
318
325
326
+ const auto CacheControlIndex = vc::InternalIntrinsic::getMemoryCacheControlOperandIndex (&CI);
319
327
for (unsigned I = 0 ; I < CI.getNumOperands (); I++) {
320
328
if (!vc::InternalIntrinsic::isOverloadedArg (IID, I))
321
329
continue ;
322
330
323
331
auto *Arg = CI.getOperand (I);
324
332
auto *ArgTy = Arg->getType ();
325
- if (!isa<IGCLLVM::FixedVectorType>(ArgTy)) {
333
+ if (!isa<IGCLLVM::FixedVectorType>(ArgTy) || CacheControlIndex == I ) {
326
334
OverloadedTypes.push_back (ArgTy);
327
335
continue ;
328
336
}
0 commit comments