@@ -13,32 +13,52 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) {
13
13
return Ty->isStructTy () || Ty->isArrayTy () || isa<ScalableVectorType>(Ty);
14
14
}
15
15
16
+ static std::optional<unsigned > getKnownVScale (Function *F) {
17
+ const auto &Attrs = F->getAttributes ().getFnAttrs ();
18
+ unsigned MinVScale = Attrs.getVScaleRangeMin ();
19
+ if (Attrs.getVScaleRangeMax () == MinVScale)
20
+ return MinVScale;
21
+ return std::nullopt;
22
+ }
23
+
16
24
// / Return true if coerceAvailableValueToLoadType will succeed.
17
25
bool canCoerceMustAliasedValueToLoad (Value *StoredVal, Type *LoadTy,
18
- const DataLayout &DL ) {
26
+ Function *F ) {
19
27
Type *StoredTy = StoredVal->getType ();
20
-
21
28
if (StoredTy == LoadTy)
22
29
return true ;
23
30
31
+ const DataLayout &DL = F->getDataLayout ();
32
+ TypeSize StoreSize = DL.getTypeSizeInBits (StoredTy);
33
+ TypeSize LoadSize = DL.getTypeSizeInBits (LoadTy);
24
34
if (isa<ScalableVectorType>(StoredTy) && isa<ScalableVectorType>(LoadTy) &&
25
- DL. getTypeSizeInBits (StoredTy) == DL. getTypeSizeInBits (LoadTy) )
35
+ StoreSize == LoadSize )
26
36
return true ;
27
37
28
- // If the loaded/stored value is a first class array/struct, or scalable type,
29
- // don't try to transform them. We need to be able to bitcast to integer.
30
- if (isFirstClassAggregateOrScalableType (LoadTy) ||
31
- isFirstClassAggregateOrScalableType (StoredTy))
38
+ // If the loaded/stored value is a first class array/struct, don't try to
39
+ // transform them. We need to be able to bitcast to integer. For scalable
40
+ // vectors forwarded to fixed-sized vectors @llvm.vector.extract is used.
41
+ if (isa<ScalableVectorType>(StoredTy) && isa<FixedVectorType>(LoadTy)) {
42
+ if (StoredTy->getScalarType () != LoadTy->getScalarType ())
43
+ return false ;
44
+
45
+ // If the VScale is known at compile-time, use that information to
46
+ // allow for wider loads.
47
+ std::optional<unsigned > VScale = getKnownVScale (F);
48
+ if (VScale)
49
+ StoreSize =
50
+ TypeSize::getFixed (StoreSize.getKnownMinValue () * VScale.value ());
51
+ } else if (isFirstClassAggregateOrScalableType (LoadTy) ||
52
+ isFirstClassAggregateOrScalableType (StoredTy)) {
32
53
return false ;
33
-
34
- uint64_t StoreSize = DL.getTypeSizeInBits (StoredTy).getFixedValue ();
54
+ }
35
55
36
56
// The store size must be byte-aligned to support future type casts.
37
57
if (llvm::alignTo (StoreSize, 8 ) != StoreSize)
38
58
return false ;
39
59
40
60
// The store has to be at least as big as the load.
41
- if (StoreSize < DL. getTypeSizeInBits (LoadTy). getFixedValue ( ))
61
+ if (! TypeSize::isKnownGE ( StoreSize, LoadSize ))
42
62
return false ;
43
63
44
64
bool StoredNI = DL.isNonIntegralPointerType (StoredTy->getScalarType ());
@@ -57,11 +77,10 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
57
77
return false ;
58
78
}
59
79
60
-
61
80
// The implementation below uses inttoptr for vectors of unequal size; we
62
81
// can't allow this for non integral pointers. We could teach it to extract
63
82
// exact subvectors if desired.
64
- if (StoredNI && StoreSize != DL. getTypeSizeInBits (LoadTy). getFixedValue () )
83
+ if (StoredNI && StoreSize != LoadSize )
65
84
return false ;
66
85
67
86
if (StoredTy->isTargetExtTy () || LoadTy->isTargetExtTy ())
@@ -77,16 +96,24 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
77
96
// /
78
97
// / If we can't do it, return null.
79
98
Value *coerceAvailableValueToLoadType (Value *StoredVal, Type *LoadedTy,
80
- IRBuilderBase &Helper,
81
- const DataLayout &DL) {
82
- assert (canCoerceMustAliasedValueToLoad (StoredVal, LoadedTy, DL) &&
99
+ IRBuilderBase &Helper, Function *F) {
100
+ assert (canCoerceMustAliasedValueToLoad (StoredVal, LoadedTy, F) &&
83
101
" precondition violation - materialization can't fail" );
102
+ const DataLayout &DL = F->getDataLayout ();
84
103
if (auto *C = dyn_cast<Constant>(StoredVal))
85
104
StoredVal = ConstantFoldConstant (C, DL);
86
105
87
106
// If this is already the right type, just return it.
88
107
Type *StoredValTy = StoredVal->getType ();
89
108
109
+ // If this is a scalable vector forwarded to a fixed vector load, create
110
+ // a @llvm.vector.extract instead of bitcasts.
111
+ if (isa<ScalableVectorType>(StoredVal->getType ()) &&
112
+ isa<FixedVectorType>(LoadedTy)) {
113
+ return Helper.CreateIntrinsic (LoadedTy, Intrinsic::vector_extract,
114
+ {StoredVal, Helper.getInt64 (0 )});
115
+ }
116
+
90
117
TypeSize StoredValSize = DL.getTypeSizeInBits (StoredValTy);
91
118
TypeSize LoadedValSize = DL.getTypeSizeInBits (LoadedTy);
92
119
@@ -220,7 +247,7 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
220
247
if (isFirstClassAggregateOrScalableType (StoredVal->getType ()))
221
248
return -1 ;
222
249
223
- if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DL ))
250
+ if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DepSI-> getFunction () ))
224
251
return -1 ;
225
252
226
253
Value *StorePtr = DepSI->getPointerOperand ();
@@ -235,11 +262,11 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
235
262
// / the other load can feed into the second load.
236
263
int analyzeLoadFromClobberingLoad (Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
237
264
const DataLayout &DL) {
238
- // Cannot handle reading from store of first-class aggregate yet .
239
- if (DepLI-> getType ()-> isStructTy () || DepLI->getType ()-> isArrayTy ( ))
265
+ // Cannot handle reading from store of first-class aggregate or scalable type .
266
+ if (isFirstClassAggregateOrScalableType ( DepLI->getType ()))
240
267
return -1 ;
241
268
242
- if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DL ))
269
+ if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DepLI-> getFunction () ))
243
270
return -1 ;
244
271
245
272
Value *DepPtr = DepLI->getPointerOperand ();
@@ -315,6 +342,16 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
315
342
return SrcVal;
316
343
}
317
344
345
+ // For the case of a scalable vector beeing forwarded to a fixed-sized load,
346
+ // only equal element types are allowed and a @llvm.vector.extract will be
347
+ // used instead of bitcasts.
348
+ if (isa<ScalableVectorType>(SrcVal->getType ()) &&
349
+ isa<FixedVectorType>(LoadTy)) {
350
+ assert (Offset == 0 &&
351
+ SrcVal->getType ()->getScalarType () == LoadTy->getScalarType ());
352
+ return SrcVal;
353
+ }
354
+
318
355
uint64_t StoreSize =
319
356
(DL.getTypeSizeInBits (SrcVal->getType ()).getFixedValue () + 7 ) / 8 ;
320
357
uint64_t LoadSize = (DL.getTypeSizeInBits (LoadTy).getFixedValue () + 7 ) / 8 ;
@@ -344,20 +381,24 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
344
381
}
345
382
346
383
Value *getValueForLoad (Value *SrcVal, unsigned Offset, Type *LoadTy,
347
- Instruction *InsertPt, const DataLayout &DL) {
384
+ Instruction *InsertPt, Function *F) {
385
+ const DataLayout &DL = F->getDataLayout ();
348
386
#ifndef NDEBUG
349
387
TypeSize SrcValSize = DL.getTypeStoreSize (SrcVal->getType ());
350
388
TypeSize LoadSize = DL.getTypeStoreSize (LoadTy);
351
- assert (SrcValSize.isScalable () == LoadSize.isScalable ());
389
+ if (auto VScale = getKnownVScale (InsertPt->getFunction ());
390
+ VScale && SrcValSize.isScalable () && !LoadSize.isScalable ())
391
+ SrcValSize =
392
+ TypeSize::getFixed (SrcValSize.getKnownMinValue () * VScale.value ());
352
393
assert ((SrcValSize.isScalable () || Offset + LoadSize <= SrcValSize) &&
353
394
" Expected Offset + LoadSize <= SrcValSize" );
354
- assert (
355
- (!SrcValSize. isScalable () || (Offset == 0 && LoadSize == SrcValSize)) &&
356
- " Expected scalable type sizes to match " );
395
+ assert ((!SrcValSize. isScalable () ||
396
+ (Offset == 0 && TypeSize::isKnownLE ( LoadSize, SrcValSize) )) &&
397
+ " Expected offset of zero and LoadSize <= SrcValSize " );
357
398
#endif
358
399
IRBuilder<> Builder (InsertPt);
359
400
SrcVal = getStoreValueForLoadHelper (SrcVal, Offset, LoadTy, Builder, DL);
360
- return coerceAvailableValueToLoadType (SrcVal, LoadTy, Builder, DL );
401
+ return coerceAvailableValueToLoadType (SrcVal, LoadTy, Builder, F );
361
402
}
362
403
363
404
Constant *getConstantValueForLoad (Constant *SrcVal, unsigned Offset,
@@ -408,7 +449,8 @@ Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
408
449
++NumBytesSet;
409
450
}
410
451
411
- return coerceAvailableValueToLoadType (Val, LoadTy, Builder, DL);
452
+ return coerceAvailableValueToLoadType (Val, LoadTy, Builder,
453
+ InsertPt->getFunction ());
412
454
}
413
455
414
456
// Otherwise, this is a memcpy/memmove from a constant global.
0 commit comments