@@ -25,6 +25,168 @@ using namespace PatternMatch;
25
25
26
26
#define DEBUG_TYPE " instcombine"
27
27
28
+ #ifndef INTEL_SYCL_OPAQUEPOINTER_READY
29
+ // / Analyze 'Val', seeing if it is a simple linear expression.
30
+ // / If so, decompose it, returning some value X, such that Val is
31
+ // / X*Scale+Offset.
32
+ // /
33
+ static Value *decomposeSimpleLinearExpr (Value *Val, unsigned &Scale,
34
+ uint64_t &Offset) {
35
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
36
+ Offset = CI->getZExtValue ();
37
+ Scale = 0 ;
38
+ return ConstantInt::get (Val->getType (), 0 );
39
+ }
40
+
41
+ if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
42
+ // Cannot look past anything that might overflow.
43
+ // We specifically require nuw because we store the Scale in an unsigned
44
+ // and perform an unsigned divide on it.
45
+ OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
46
+ if (OBI && !OBI->hasNoUnsignedWrap ()) {
47
+ Scale = 1 ;
48
+ Offset = 0 ;
49
+ return Val;
50
+ }
51
+
52
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand (1 ))) {
53
+ if (I->getOpcode () == Instruction::Shl) {
54
+ // This is a value scaled by '1 << the shift amt'.
55
+ Scale = UINT64_C (1 ) << RHS->getZExtValue ();
56
+ Offset = 0 ;
57
+ return I->getOperand (0 );
58
+ }
59
+
60
+ if (I->getOpcode () == Instruction::Mul) {
61
+ // This value is scaled by 'RHS'.
62
+ Scale = RHS->getZExtValue ();
63
+ Offset = 0 ;
64
+ return I->getOperand (0 );
65
+ }
66
+
67
+ if (I->getOpcode () == Instruction::Add) {
68
+ // We have X+C. Check to see if we really have (X*C2)+C1,
69
+ // where C1 is divisible by C2.
70
+ unsigned SubScale;
71
+ Value *SubVal =
72
+ decomposeSimpleLinearExpr (I->getOperand (0 ), SubScale, Offset);
73
+ Offset += RHS->getZExtValue ();
74
+ Scale = SubScale;
75
+ return SubVal;
76
+ }
77
+ }
78
+ }
79
+
80
+ // Otherwise, we can't look past this.
81
+ Scale = 1 ;
82
+ Offset = 0 ;
83
+ return Val;
84
+ }
85
+
86
+ // / If we find a cast of an allocation instruction, try to eliminate the cast by
87
+ // / moving the type information into the alloc.
88
+ Instruction *InstCombinerImpl::PromoteCastOfAllocation (BitCastInst &CI,
89
+ AllocaInst &AI) {
90
+ PointerType *PTy = cast<PointerType>(CI.getType ());
91
+ // Opaque pointers don't have an element type we could replace with.
92
+ if (PTy->isOpaque ())
93
+ return nullptr ;
94
+
95
+ IRBuilderBase::InsertPointGuard Guard (Builder);
96
+ Builder.SetInsertPoint (&AI);
97
+
98
+ // Get the type really allocated and the type casted to.
99
+ Type *AllocElTy = AI.getAllocatedType ();
100
+ Type *CastElTy = PTy->getNonOpaquePointerElementType ();
101
+ if (!AllocElTy->isSized () || !CastElTy->isSized ()) return nullptr ;
102
+
103
+ // This optimisation does not work for cases where the cast type
104
+ // is scalable and the allocated type is not. This because we need to
105
+ // know how many times the casted type fits into the allocated type.
106
+ // For the opposite case where the allocated type is scalable and the
107
+ // cast type is not this leads to poor code quality due to the
108
+ // introduction of 'vscale' into the calculations. It seems better to
109
+ // bail out for this case too until we've done a proper cost-benefit
110
+ // analysis.
111
+ bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
112
+ bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
113
+ if (AllocIsScalable != CastIsScalable) return nullptr ;
114
+
115
+ Align AllocElTyAlign = DL.getABITypeAlign (AllocElTy);
116
+ Align CastElTyAlign = DL.getABITypeAlign (CastElTy);
117
+ if (CastElTyAlign < AllocElTyAlign) return nullptr ;
118
+
119
+ // If the allocation has multiple uses, only promote it if we are strictly
120
+ // increasing the alignment of the resultant allocation. If we keep it the
121
+ // same, we open the door to infinite loops of various kinds.
122
+ if (!AI.hasOneUse () && CastElTyAlign == AllocElTyAlign) return nullptr ;
123
+
124
+ // The alloc and cast types should be either both fixed or both scalable.
125
+ uint64_t AllocElTySize = DL.getTypeAllocSize (AllocElTy).getKnownMinValue ();
126
+ uint64_t CastElTySize = DL.getTypeAllocSize (CastElTy).getKnownMinValue ();
127
+ if (CastElTySize == 0 || AllocElTySize == 0 ) return nullptr ;
128
+
129
+ // If the allocation has multiple uses, only promote it if we're not
130
+ // shrinking the amount of memory being allocated.
131
+ uint64_t AllocElTyStoreSize =
132
+ DL.getTypeStoreSize (AllocElTy).getKnownMinValue ();
133
+ uint64_t CastElTyStoreSize = DL.getTypeStoreSize (CastElTy).getKnownMinValue ();
134
+ if (!AI.hasOneUse () && CastElTyStoreSize < AllocElTyStoreSize) return nullptr ;
135
+
136
+ // See if we can satisfy the modulus by pulling a scale out of the array
137
+ // size argument.
138
+ unsigned ArraySizeScale;
139
+ uint64_t ArrayOffset;
140
+ Value *NumElements = // See if the array size is a decomposable linear expr.
141
+ decomposeSimpleLinearExpr (AI.getOperand (0 ), ArraySizeScale, ArrayOffset);
142
+
143
+ // If we can now satisfy the modulus, by using a non-1 scale, we really can
144
+ // do the xform.
145
+ if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
146
+ (AllocElTySize*ArrayOffset ) % CastElTySize != 0 ) return nullptr ;
147
+
148
+ // We don't currently support arrays of scalable types.
149
+ assert (!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0 ));
150
+
151
+ unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
152
+ Value *Amt = nullptr ;
153
+ if (Scale == 1 ) {
154
+ Amt = NumElements;
155
+ } else {
156
+ Amt = ConstantInt::get (AI.getArraySize ()->getType (), Scale);
157
+ // Insert before the alloca, not before the cast.
158
+ Amt = Builder.CreateMul (Amt, NumElements);
159
+ }
160
+
161
+ if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
162
+ Value *Off = ConstantInt::get (AI.getArraySize ()->getType (),
163
+ Offset, true );
164
+ Amt = Builder.CreateAdd (Amt, Off);
165
+ }
166
+
167
+ AllocaInst *New = Builder.CreateAlloca (CastElTy, AI.getAddressSpace (), Amt);
168
+ New->setAlignment (AI.getAlign ());
169
+ New->takeName (&AI);
170
+ New->setUsedWithInAlloca (AI.isUsedWithInAlloca ());
171
+ New->setMetadata (LLVMContext::MD_DIAssignID,
172
+ AI.getMetadata (LLVMContext::MD_DIAssignID));
173
+
174
+ replaceAllDbgUsesWith (AI, *New, *New, DT);
175
+
176
+ // If the allocation has multiple real uses, insert a cast and change all
177
+ // things that used it to use the new cast. This will also hack on CI, but it
178
+ // will die soon.
179
+ if (!AI.hasOneUse ()) {
180
+ // New is the allocation instruction, pointer typed. AI is the original
181
+ // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
182
+ Value *NewCast = Builder.CreateBitCast (New, AI.getType (), " tmpcast" );
183
+ replaceInstUsesWith (AI, NewCast);
184
+ eraseInstFromFunction (AI);
185
+ }
186
+ return replaceInstUsesWith (CI, New);
187
+ }
188
+ #endif // INTEL_SYCL_OPAQUEPOINTER_READY
189
+
28
190
// / Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29
191
// / true for, actually insert the code to evaluate the expression.
30
192
Value *InstCombinerImpl::EvaluateInDifferentType (Value *V, Type *Ty,
@@ -2634,9 +2796,18 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
2634
2796
return replaceInstUsesWith (CI, Src);
2635
2797
2636
2798
#ifndef INTEL_SYCL_OPAQUEPOINTER_READY
2637
- if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy))
2799
+ if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
2800
+ // If we are casting a alloca to a pointer to a type of the same
2801
+ // size, rewrite the allocation instruction to allocate the "right" type.
2802
+ // There is no need to modify malloc calls because it is their bitcast that
2803
+ // needs to be cleaned up.
2804
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
2805
+ if (Instruction *V = PromoteCastOfAllocation (CI, *AI))
2806
+ return V;
2807
+
2638
2808
if (Instruction *I = convertBitCastToGEP (CI, Builder, DL))
2639
2809
return I;
2810
+ }
2640
2811
#endif // INTEL_SYCL_OPAQUEPOINTER_READY
2641
2812
2642
2813
if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
0 commit comments