@@ -163,7 +163,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
163
163
" InsertNoopCastOfTo cannot change sizes!" );
164
164
165
165
// inttoptr only works for integral pointers. For non-integral pointers, we
166
- // can create a GEP on null with the integral value as index. Note that
166
+ // can create a GEP on i8* null with the integral value as index. Note that
167
167
// it is safe to use GEP of null instead of inttoptr here, because only
168
168
// expressions already based on a GEP of null should be converted to pointers
169
169
// during expansion.
@@ -173,8 +173,9 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
173
173
auto *Int8PtrTy = Builder.getInt8PtrTy (PtrTy->getAddressSpace ());
174
174
assert (DL.getTypeAllocSize (Builder.getInt8Ty ()) == 1 &&
175
175
" alloc size of i8 must by 1 byte for the GEP to be correct" );
176
- return Builder.CreateGEP (
176
+ auto *GEP = Builder.CreateGEP (
177
177
Builder.getInt8Ty (), Constant::getNullValue (Int8PtrTy), V, " scevgep" );
178
+ return Builder.CreateBitCast (GEP, Ty);
178
179
}
179
180
}
180
181
// Short-circuit unnecessary bitcasts.
@@ -377,66 +378,212 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
377
378
// / can be folded using target addressing modes.
378
379
// /
379
380
Value *SCEVExpander::expandAddToGEP (const SCEV *const *op_begin,
380
- const SCEV *const *op_end, Type *Ty,
381
+ const SCEV *const *op_end,
382
+ PointerType *PTy,
383
+ Type *Ty,
381
384
Value *V) {
385
+ SmallVector<Value *, 4 > GepIndices;
382
386
SmallVector<const SCEV *, 8 > Ops (op_begin, op_end);
387
+ bool AnyNonZeroIndices = false ;
383
388
384
389
// Split AddRecs up into parts as either of the parts may be usable
385
390
// without the other.
386
391
SplitAddRecs (Ops, Ty, SE);
387
392
388
- assert (!isa<Instruction>(V) ||
389
- SE.DT .dominates (cast<Instruction>(V), &*Builder.GetInsertPoint ()));
393
+ Type *IntIdxTy = DL.getIndexType (PTy);
394
+
395
+ // For opaque pointers, always generate i8 GEP.
396
+ if (!PTy->isOpaque ()) {
397
+ // Descend down the pointer's type and attempt to convert the other
398
+ // operands into GEP indices, at each level. The first index in a GEP
399
+ // indexes into the array implied by the pointer operand; the rest of
400
+ // the indices index into the element or field type selected by the
401
+ // preceding index.
402
+ Type *ElTy = PTy->getNonOpaquePointerElementType ();
403
+ for (;;) {
404
+ // If the scale size is not 0, attempt to factor out a scale for
405
+ // array indexing.
406
+ SmallVector<const SCEV *, 8 > ScaledOps;
407
+ if (ElTy->isSized ()) {
408
+ const SCEV *ElSize = SE.getSizeOfExpr (IntIdxTy, ElTy);
409
+ if (!ElSize->isZero ()) {
410
+ SmallVector<const SCEV *, 8 > NewOps;
411
+ for (const SCEV *Op : Ops) {
412
+ const SCEV *Remainder = SE.getConstant (Ty, 0 );
413
+ if (FactorOutConstant (Op, Remainder, ElSize, SE, DL)) {
414
+ // Op now has ElSize factored out.
415
+ ScaledOps.push_back (Op);
416
+ if (!Remainder->isZero ())
417
+ NewOps.push_back (Remainder);
418
+ AnyNonZeroIndices = true ;
419
+ } else {
420
+ // The operand was not divisible, so add it to the list of
421
+ // operands we'll scan next iteration.
422
+ NewOps.push_back (Op);
423
+ }
424
+ }
425
+ // If we made any changes, update Ops.
426
+ if (!ScaledOps.empty ()) {
427
+ Ops = NewOps;
428
+ SimplifyAddOperands (Ops, Ty, SE);
429
+ }
430
+ }
431
+ }
390
432
391
- // Expand the operands for a plain byte offset.
392
- Value *Idx = expandCodeForImpl (SE.getAddExpr (Ops), Ty);
433
+ // Record the scaled array index for this level of the type. If
434
+ // we didn't find any operands that could be factored, tentatively
435
+ // assume that element zero was selected (since the zero offset
436
+ // would obviously be folded away).
437
+ Value *Scaled =
438
+ ScaledOps.empty ()
439
+ ? Constant::getNullValue (Ty)
440
+ : expandCodeForImpl (SE.getAddExpr (ScaledOps), Ty);
441
+ GepIndices.push_back (Scaled);
442
+
443
+ // Collect struct field index operands.
444
+ while (StructType *STy = dyn_cast<StructType>(ElTy)) {
445
+ bool FoundFieldNo = false ;
446
+ // An empty struct has no fields.
447
+ if (STy->getNumElements () == 0 ) break ;
448
+ // Field offsets are known. See if a constant offset falls within any of
449
+ // the struct fields.
450
+ if (Ops.empty ())
451
+ break ;
452
+ assert (
453
+ !STy->containsScalableVectorType () &&
454
+ " GEPs are not supported on structures containing scalable vectors" );
455
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0 ]))
456
+ if (SE.getTypeSizeInBits (C->getType ()) <= 64 ) {
457
+ const StructLayout &SL = *DL.getStructLayout (STy);
458
+ uint64_t FullOffset = C->getValue ()->getZExtValue ();
459
+ if (FullOffset < SL.getSizeInBytes ()) {
460
+ unsigned ElIdx = SL.getElementContainingOffset (FullOffset);
461
+ GepIndices.push_back (
462
+ ConstantInt::get (Type::getInt32Ty (Ty->getContext ()), ElIdx));
463
+ ElTy = STy->getTypeAtIndex (ElIdx);
464
+ Ops[0 ] =
465
+ SE.getConstant (Ty, FullOffset - SL.getElementOffset (ElIdx));
466
+ AnyNonZeroIndices = true ;
467
+ FoundFieldNo = true ;
468
+ }
469
+ }
470
+ // If no struct field offsets were found, tentatively assume that
471
+ // field zero was selected (since the zero offset would obviously
472
+ // be folded away).
473
+ if (!FoundFieldNo) {
474
+ ElTy = STy->getTypeAtIndex (0u );
475
+ GepIndices.push_back (
476
+ Constant::getNullValue (Type::getInt32Ty (Ty->getContext ())));
477
+ }
478
+ }
393
479
394
- // Fold a GEP with constant operands.
395
- if (Constant *CLHS = dyn_cast<Constant>(V))
396
- if (Constant *CRHS = dyn_cast<Constant>(Idx))
397
- return Builder.CreateGEP (Builder.getInt8Ty (), CLHS, CRHS);
480
+ if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
481
+ ElTy = ATy->getElementType ();
482
+ else
483
+ // FIXME: Handle VectorType.
484
+ // E.g., If ElTy is scalable vector, then ElSize is not a compile-time
485
+ // constant, therefore can not be factored out. The generated IR is less
486
+ // ideal with base 'V' cast to i8* and do ugly getelementptr over that.
487
+ break ;
488
+ }
489
+ }
398
490
399
- // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
400
- unsigned ScanLimit = 6 ;
401
- BasicBlock::iterator BlockBegin = Builder.GetInsertBlock ()->begin ();
402
- // Scanning starts from the last instruction before the insertion point.
403
- BasicBlock::iterator IP = Builder.GetInsertPoint ();
404
- if (IP != BlockBegin) {
405
- --IP;
406
- for (; ScanLimit; --IP, --ScanLimit) {
407
- // Don't count dbg.value against the ScanLimit, to avoid perturbing the
408
- // generated code.
409
- if (isa<DbgInfoIntrinsic>(IP))
410
- ScanLimit++;
411
- if (IP->getOpcode () == Instruction::GetElementPtr &&
412
- IP->getOperand (0 ) == V && IP->getOperand (1 ) == Idx &&
413
- cast<GEPOperator>(&*IP)->getSourceElementType () ==
414
- Type::getInt8Ty (Ty->getContext ()))
415
- return &*IP;
416
- if (IP == BlockBegin) break ;
491
+ // If none of the operands were convertible to proper GEP indices, cast
492
+ // the base to i8* and do an ugly getelementptr with that. It's still
493
+ // better than ptrtoint+arithmetic+inttoptr at least.
494
+ if (!AnyNonZeroIndices) {
495
+ // Cast the base to i8*.
496
+ if (!PTy->isOpaque ())
497
+ V = InsertNoopCastOfTo (V,
498
+ Type::getInt8PtrTy (Ty->getContext (), PTy->getAddressSpace ()));
499
+
500
+ assert (!isa<Instruction>(V) ||
501
+ SE.DT .dominates (cast<Instruction>(V), &*Builder.GetInsertPoint ()));
502
+
503
+ // Expand the operands for a plain byte offset.
504
+ Value *Idx = expandCodeForImpl (SE.getAddExpr (Ops), Ty);
505
+
506
+ // Fold a GEP with constant operands.
507
+ if (Constant *CLHS = dyn_cast<Constant>(V))
508
+ if (Constant *CRHS = dyn_cast<Constant>(Idx))
509
+ return Builder.CreateGEP (Builder.getInt8Ty (), CLHS, CRHS);
510
+
511
+ // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
512
+ unsigned ScanLimit = 6 ;
513
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock ()->begin ();
514
+ // Scanning starts from the last instruction before the insertion point.
515
+ BasicBlock::iterator IP = Builder.GetInsertPoint ();
516
+ if (IP != BlockBegin) {
517
+ --IP;
518
+ for (; ScanLimit; --IP, --ScanLimit) {
519
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
520
+ // generated code.
521
+ if (isa<DbgInfoIntrinsic>(IP))
522
+ ScanLimit++;
523
+ if (IP->getOpcode () == Instruction::GetElementPtr &&
524
+ IP->getOperand (0 ) == V && IP->getOperand (1 ) == Idx &&
525
+ cast<GEPOperator>(&*IP)->getSourceElementType () ==
526
+ Type::getInt8Ty (Ty->getContext ()))
527
+ return &*IP;
528
+ if (IP == BlockBegin) break ;
529
+ }
530
+ }
531
+
532
+ // Save the original insertion point so we can restore it when we're done.
533
+ SCEVInsertPointGuard Guard (Builder, this );
534
+
535
+ // Move the insertion point out of as many loops as we can.
536
+ while (const Loop *L = SE.LI .getLoopFor (Builder.GetInsertBlock ())) {
537
+ if (!L->isLoopInvariant (V) || !L->isLoopInvariant (Idx)) break ;
538
+ BasicBlock *Preheader = L->getLoopPreheader ();
539
+ if (!Preheader) break ;
540
+
541
+ // Ok, move up a level.
542
+ Builder.SetInsertPoint (Preheader->getTerminator ());
417
543
}
544
+
545
+ // Emit a GEP.
546
+ return Builder.CreateGEP (Builder.getInt8Ty (), V, Idx, " scevgep" );
418
547
}
419
548
420
- // Save the original insertion point so we can restore it when we're done.
421
- SCEVInsertPointGuard Guard (Builder, this );
549
+ {
550
+ SCEVInsertPointGuard Guard (Builder, this );
422
551
423
- // Move the insertion point out of as many loops as we can.
424
- while (const Loop *L = SE.LI .getLoopFor (Builder.GetInsertBlock ())) {
425
- if (!L->isLoopInvariant (V) || !L->isLoopInvariant (Idx)) break ;
426
- BasicBlock *Preheader = L->getLoopPreheader ();
427
- if (!Preheader) break ;
552
+ // Move the insertion point out of as many loops as we can.
553
+ while (const Loop *L = SE.LI .getLoopFor (Builder.GetInsertBlock ())) {
554
+ if (!L->isLoopInvariant (V)) break ;
428
555
429
- // Ok, move up a level.
430
- Builder.SetInsertPoint (Preheader->getTerminator ());
556
+ bool AnyIndexNotLoopInvariant = any_of (
557
+ GepIndices, [L](Value *Op) { return !L->isLoopInvariant (Op); });
558
+
559
+ if (AnyIndexNotLoopInvariant)
560
+ break ;
561
+
562
+ BasicBlock *Preheader = L->getLoopPreheader ();
563
+ if (!Preheader) break ;
564
+
565
+ // Ok, move up a level.
566
+ Builder.SetInsertPoint (Preheader->getTerminator ());
567
+ }
568
+
569
+ // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
570
+ // because ScalarEvolution may have changed the address arithmetic to
571
+ // compute a value which is beyond the end of the allocated object.
572
+ Value *Casted = V;
573
+ if (V->getType () != PTy)
574
+ Casted = InsertNoopCastOfTo (Casted, PTy);
575
+ Value *GEP = Builder.CreateGEP (PTy->getNonOpaquePointerElementType (),
576
+ Casted, GepIndices, " scevgep" );
577
+ Ops.push_back (SE.getUnknown (GEP));
431
578
}
432
579
433
- // Emit a GEP.
434
- return Builder.CreateGEP (Builder.getInt8Ty (), V, Idx, " scevgep" );
580
+ return expand (SE.getAddExpr (Ops));
435
581
}
436
582
437
- Value *SCEVExpander::expandAddToGEP (const SCEV *Op, Type *Ty, Value *V) {
583
+ Value *SCEVExpander::expandAddToGEP (const SCEV *Op, PointerType *PTy, Type *Ty,
584
+ Value *V) {
438
585
const SCEV *const Ops[1 ] = {Op};
439
- return expandAddToGEP (Ops, Ops + 1 , Ty, V);
586
+ return expandAddToGEP (Ops, Ops + 1 , PTy, Ty, V);
440
587
}
441
588
442
589
// / PickMostRelevantLoop - Given two loops pick the one that's most relevant for
@@ -562,7 +709,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
562
709
}
563
710
564
711
assert (!Op->getType ()->isPointerTy () && " Only first op can be pointer" );
565
- if (isa <PointerType>(Sum->getType ())) {
712
+ if (PointerType *PTy = dyn_cast <PointerType>(Sum->getType ())) {
566
713
// The running sum expression is a pointer. Try to form a getelementptr
567
714
// at this level with that as the base.
568
715
SmallVector<const SCEV *, 4 > NewOps;
@@ -575,7 +722,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
575
722
X = SE.getSCEV (U->getValue ());
576
723
NewOps.push_back (X);
577
724
}
578
- Sum = expandAddToGEP (NewOps.begin (), NewOps.end (), Ty, Sum);
725
+ Sum = expandAddToGEP (NewOps.begin (), NewOps.end (), PTy, Ty, Sum);
579
726
} else if (Op->isNonConstantNegative ()) {
580
727
// Instead of doing a negate and add, just do a subtract.
581
728
Value *W = expandCodeForImpl (SE.getNegativeSCEV (Op), Ty);
@@ -885,7 +1032,15 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
885
1032
Value *IncV;
886
1033
// If the PHI is a pointer, use a GEP, otherwise use an add or sub.
887
1034
if (ExpandTy->isPointerTy ()) {
888
- IncV = expandAddToGEP (SE.getSCEV (StepV), IntTy, PN);
1035
+ PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
1036
+ // If the step isn't constant, don't use an implicitly scaled GEP, because
1037
+ // that would require a multiply inside the loop.
1038
+ if (!isa<ConstantInt>(StepV))
1039
+ GEPPtrTy = PointerType::get (Type::getInt1Ty (SE.getContext ()),
1040
+ GEPPtrTy->getAddressSpace ());
1041
+ IncV = expandAddToGEP (SE.getSCEV (StepV), GEPPtrTy, IntTy, PN);
1042
+ if (IncV->getType () != PN->getType ())
1043
+ IncV = Builder.CreateBitCast (IncV, PN->getType ());
889
1044
} else {
890
1045
IncV = useSubtract ?
891
1046
Builder.CreateSub (PN, StepV, Twine (IVName) + " .iv.next" ) :
@@ -1285,12 +1440,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
1285
1440
1286
1441
// Re-apply any non-loop-dominating offset.
1287
1442
if (PostLoopOffset) {
1288
- if (isa <PointerType>(ExpandTy)) {
1443
+ if (PointerType *PTy = dyn_cast <PointerType>(ExpandTy)) {
1289
1444
if (Result->getType ()->isIntegerTy ()) {
1290
1445
Value *Base = expandCodeForImpl (PostLoopOffset, ExpandTy);
1291
- Result = expandAddToGEP (SE.getUnknown (Result), IntTy, Base);
1446
+ Result = expandAddToGEP (SE.getUnknown (Result), PTy, IntTy, Base);
1292
1447
} else {
1293
- Result = expandAddToGEP (PostLoopOffset, IntTy, Result);
1448
+ Result = expandAddToGEP (PostLoopOffset, PTy, IntTy, Result);
1294
1449
}
1295
1450
} else {
1296
1451
Result = InsertNoopCastOfTo (Result, IntTy);
@@ -1344,9 +1499,10 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
1344
1499
1345
1500
// {X,+,F} --> X + {0,+,F}
1346
1501
if (!S->getStart ()->isZero ()) {
1347
- if (isa <PointerType>(S->getType ())) {
1502
+ if (PointerType *PTy = dyn_cast <PointerType>(S->getType ())) {
1348
1503
Value *StartV = expand (SE.getPointerBase (S));
1349
- return expandAddToGEP (SE.removePointerBase (S), Ty, StartV);
1504
+ assert (StartV->getType () == PTy && " Pointer type mismatch for GEP!" );
1505
+ return expandAddToGEP (SE.removePointerBase (S), PTy, Ty, StartV);
1350
1506
}
1351
1507
1352
1508
SmallVector<const SCEV *, 4 > NewOps (S->operands ());
0 commit comments