@@ -234,6 +234,99 @@ Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
234
234
// set(Def, Extract, Instance);
235
235
return Extract;
236
236
}
237
+
238
+ Value *VPTransformState::get (VPValue *Def, unsigned Part) {
239
+ // If Values have been set for this Def return the one relevant for \p Part.
240
+ if (hasVectorValue (Def, Part))
241
+ return Data.PerPartOutput [Def][Part];
242
+
243
+ auto GetBroadcastInstrs = [this , Def](Value *V) {
244
+ bool SafeToHoist = Def->isDefinedOutsideVectorRegions ();
245
+ if (VF.isScalar ())
246
+ return V;
247
+ // Place the code for broadcasting invariant variables in the new preheader.
248
+ IRBuilder<>::InsertPointGuard Guard (Builder);
249
+ if (SafeToHoist) {
250
+ BasicBlock *LoopVectorPreHeader = CFG.VPBB2IRBB [cast<VPBasicBlock>(
251
+ Plan->getVectorLoopRegion ()->getSinglePredecessor ())];
252
+ if (LoopVectorPreHeader)
253
+ Builder.SetInsertPoint (LoopVectorPreHeader->getTerminator ());
254
+ }
255
+
256
+ // Place the code for broadcasting invariant variables in the new preheader.
257
+ // Broadcast the scalar into all locations in the vector.
258
+ Value *Shuf = Builder.CreateVectorSplat (VF, V, " broadcast" );
259
+
260
+ return Shuf;
261
+ };
262
+
263
+ if (!hasScalarValue (Def, {Part, 0 })) {
264
+ assert (Def->isLiveIn () && " expected a live-in" );
265
+ if (Part != 0 )
266
+ return get (Def, 0 );
267
+ Value *IRV = Def->getLiveInIRValue ();
268
+ Value *B = GetBroadcastInstrs (IRV);
269
+ set (Def, B, Part);
270
+ return B;
271
+ }
272
+
273
+ Value *ScalarValue = get (Def, {Part, 0 });
274
+ // If we aren't vectorizing, we can just copy the scalar map values over
275
+ // to the vector map.
276
+ if (VF.isScalar ()) {
277
+ set (Def, ScalarValue, Part);
278
+ return ScalarValue;
279
+ }
280
+
281
+ bool IsUniform = vputils::isUniformAfterVectorization (Def);
282
+
283
+ unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue () - 1 ;
284
+ // Check if there is a scalar value for the selected lane.
285
+ if (!hasScalarValue (Def, {Part, LastLane})) {
286
+ // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
287
+ // VPExpandSCEVRecipes can also be uniform.
288
+ assert ((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe ()) ||
289
+ isa<VPScalarIVStepsRecipe>(Def->getDefiningRecipe ()) ||
290
+ isa<VPExpandSCEVRecipe>(Def->getDefiningRecipe ())) &&
291
+ " unexpected recipe found to be invariant" );
292
+ IsUniform = true ;
293
+ LastLane = 0 ;
294
+ }
295
+
296
+ auto *LastInst = cast<Instruction>(get (Def, {Part, LastLane}));
297
+ // Set the insert point after the last scalarized instruction or after the
298
+ // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
299
+ // will directly follow the scalar definitions.
300
+ auto OldIP = Builder.saveIP ();
301
+ auto NewIP =
302
+ isa<PHINode>(LastInst)
303
+ ? BasicBlock::iterator (LastInst->getParent ()->getFirstNonPHI ())
304
+ : std::next (BasicBlock::iterator (LastInst));
305
+ Builder.SetInsertPoint (&*NewIP);
306
+
307
+ // However, if we are vectorizing, we need to construct the vector values.
308
+ // If the value is known to be uniform after vectorization, we can just
309
+ // broadcast the scalar value corresponding to lane zero for each unroll
310
+ // iteration. Otherwise, we construct the vector values using
311
+ // insertelement instructions. Since the resulting vectors are stored in
312
+ // State, we will only generate the insertelements once.
313
+ Value *VectorValue = nullptr ;
314
+ if (IsUniform) {
315
+ VectorValue = GetBroadcastInstrs (ScalarValue);
316
+ set (Def, VectorValue, Part);
317
+ } else {
318
+ // Initialize packing with insertelements to start from undef.
319
+ assert (!VF.isScalable () && " VF is assumed to be non scalable." );
320
+ Value *Undef = PoisonValue::get (VectorType::get (LastInst->getType (), VF));
321
+ set (Def, Undef, Part);
322
+ for (unsigned Lane = 0 ; Lane < VF.getKnownMinValue (); ++Lane)
323
+ packScalarIntoVectorValue (Def, {Part, Lane});
324
+ VectorValue = get (Def, Part);
325
+ }
326
+ Builder.restoreIP (OldIP);
327
+ return VectorValue;
328
+ }
329
+
237
330
BasicBlock *VPTransformState::CFGState::getPreheaderBBFor (VPRecipeBase *R) {
238
331
VPRegionBlock *LoopRegion = R->getParent ()->getEnclosingLoopRegion ();
239
332
return VPBB2IRBB[LoopRegion->getPreheaderVPBB ()];
0 commit comments