@@ -554,17 +554,7 @@ bool MemOpt::mergeLoad(LoadInst *LeadingLoad,
554
554
= dyn_cast<SCEVConstant>(SE->getMinusSCEV (NextPtr, LeadingPtr));
555
555
// Skip load with non-constant distance.
556
556
if (!Offset) {
557
- unsigned AS = LeadingLoad->getPointerAddressSpace ();
558
- // FIXME: So far, for OCL, only apply SymbolicPtr to local memory which
559
- // has only 32 (in fact 16) significant bits.
560
- // In case the pointer artithmetic is already broken down, as SCEV won't work
561
- // always apply SymbolicPtr.
562
- if (CGC->type == ShaderType::OPENCL_SHADER) {
563
- if ((AS != ADDRESS_SPACE_LOCAL) &&
564
- ((AS < ADDRESS_SPACE_NUM_ADDRESSES)))
565
- continue ;
566
- }
567
-
557
+
568
558
SymbolicPointer LeadingSymPtr;
569
559
SymbolicPointer NextSymPtr;
570
560
if (SymbolicPointer::decomposePointer (LeadingLoad->getPointerOperand (),
@@ -888,16 +878,6 @@ bool MemOpt::mergeStore(StoreInst *LeadingStore,
888
878
= dyn_cast<SCEVConstant>(SE->getMinusSCEV (NextPtr, LeadingPtr));
889
879
// Skip load with non-constant distance.
890
880
if (!Offset) {
891
- unsigned AS = LeadingStore->getPointerAddressSpace ();
892
- // FIXME: So far, for OCL, only apply SymbolicPtr to local memory which
893
- // has only 32 (in fact 16) significant bits.
894
- // In case the pointer artithmetic is already broken down, as SCEV won't work
895
- // always apply SymbolicPtr.
896
- if (CGC->type == ShaderType::OPENCL_SHADER) {
897
- if ((AS != ADDRESS_SPACE_LOCAL) &&
898
- ((AS < ADDRESS_SPACE_NUM_ADDRESSES)))
899
- continue ;
900
- }
901
881
902
882
SymbolicPointer LeadingSymPtr;
903
883
SymbolicPointer NextSymPtr;
@@ -1535,49 +1515,66 @@ SymbolicPointer::decomposePointer(const Value *Ptr, SymbolicPointer &SymPtr,
1535
1515
continue ;
1536
1516
}
1537
1517
1538
- uint64_t Scale = DL->getTypeAllocSize (GTI.getIndexedType ());
1539
- ExtensionKind Extension = EK_NotExtended;
1540
-
1541
- // If the integer type is smaller than the pointer size, it is implicitly
1542
- // sign extended to pointer size.
1543
- unsigned Width = Index->getType ()->getIntegerBitWidth ();
1544
- if (ptrSize > Width)
1545
- Extension = EK_SignExt;
1546
-
1547
- // Use getLinearExpression to decompose the index into a C1*V+C2 form.
1548
- APInt IndexScale (Width, 0 ), IndexOffset (Width, 0 );
1549
- Index = getLinearExpression (Index, IndexScale, IndexOffset, Extension,
1550
- 0U , DL);
1551
-
1552
- // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
1553
- // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
1554
- SymPtr.Offset += IndexOffset.getSExtValue () * Scale;
1555
- Scale *= IndexScale.getSExtValue ();
1556
-
1557
- SymbolicIndex Idx (Index, Extension);
1558
-
1559
- // If we already had an occurrence of this index variable, merge this
1560
- // scale into it. For example, we want to handle:
1561
- // A[x][x] -> x*16 + x*4 -> x*20
1562
- // This also ensures that 'x' only appears in the index list once.
1563
- for (unsigned i = 0 , e = SymPtr.Terms .size (); i != e; ++i) {
1564
- if (SymPtr.Terms [i].Idx == Idx) {
1565
- Scale += SymPtr.Terms [i].Scale ;
1566
- SymPtr.Terms .erase (SymPtr.Terms .begin ()+i);
1567
- break ;
1568
- }
1518
+ // In some cases the GEP might have indices that don't directly have a baseoffset
1519
+ // we need to dig deeper to find these
1520
+ std::vector<Value*> terms = {Index};
1521
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(Index))
1522
+ {
1523
+ if (!(dyn_cast<ConstantInt>(BOp->getOperand (1 ))) &&
1524
+ BOp->getOpcode () == Instruction::Add)
1525
+ {
1526
+ terms.clear ();
1527
+ terms.push_back (BOp->getOperand (0 ));
1528
+ terms.push_back (BOp->getOperand (1 ));
1529
+ }
1569
1530
}
1570
1531
1571
- // Make sure that we have a scale that makes sense for this target's
1572
- // pointer size.
1573
- if (unsigned ShiftBits = 64 - ptrSize) {
1574
- Scale <<= ShiftBits;
1575
- Scale = (int64_t )Scale >> ShiftBits;
1576
- }
1532
+ for (auto Ind : terms)
1533
+ {
1534
+ uint64_t Scale = DL->getTypeAllocSize (GTI.getIndexedType ());
1535
+ ExtensionKind Extension = EK_NotExtended;
1536
+
1537
+ // If the integer type is smaller than the pointer size, it is implicitly
1538
+ // sign extended to pointer size.
1539
+ unsigned Width = Index->getType ()->getIntegerBitWidth ();
1540
+ if (ptrSize > Width)
1541
+ Extension = EK_SignExt;
1542
+
1543
+ // Use getLinearExpression to decompose the index into a C1*V+C2 form.
1544
+ APInt IndexScale (Width, 0 ), IndexOffset (Width, 0 );
1545
+ Value* new_Ind = getLinearExpression (Ind, IndexScale, IndexOffset, Extension,
1546
+ 0U , DL);
1547
+
1548
+ // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
1549
+ // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
1550
+ SymPtr.Offset += IndexOffset.getSExtValue () * Scale;
1551
+ Scale *= IndexScale.getSExtValue ();
1552
+
1553
+ SymbolicIndex Idx (new_Ind, Extension);
1554
+
1555
+ // If we already had an occurrence of this index variable, merge this
1556
+ // scale into it. For example, we want to handle:
1557
+ // A[x][x] -> x*16 + x*4 -> x*20
1558
+ // This also ensures that 'x' only appears in the index list once.
1559
+ for (unsigned i = 0 , e = SymPtr.Terms .size (); i != e; ++i) {
1560
+ if (SymPtr.Terms [i].Idx == Idx) {
1561
+ Scale += SymPtr.Terms [i].Scale ;
1562
+ SymPtr.Terms .erase (SymPtr.Terms .begin () + i);
1563
+ break ;
1564
+ }
1565
+ }
1566
+
1567
+ // Make sure that we have a scale that makes sense for this target's
1568
+ // pointer size.
1569
+ if (unsigned ShiftBits = 64 - ptrSize) {
1570
+ Scale <<= ShiftBits;
1571
+ Scale = (int64_t )Scale >> ShiftBits;
1572
+ }
1577
1573
1578
- if (Scale) {
1579
- Term Entry = {Idx, int64_t (Scale)};
1580
- SymPtr.Terms .push_back (Entry);
1574
+ if (Scale) {
1575
+ Term Entry = { Idx, int64_t (Scale) };
1576
+ SymPtr.Terms .push_back (Entry);
1577
+ }
1581
1578
}
1582
1579
}
1583
1580
0 commit comments