@@ -13,32 +13,54 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) {
13
13
return Ty->isStructTy () || Ty->isArrayTy () || isa<ScalableVectorType>(Ty);
14
14
}
15
15
16
+ static std::optional<unsigned > getKnownVScale (Function *F) {
17
+ const auto &Attrs = F->getAttributes ().getFnAttrs ();
18
+ unsigned MinVScale = Attrs.getVScaleRangeMin ();
19
+ if (Attrs.getVScaleRangeMax () == MinVScale)
20
+ return MinVScale;
21
+ return std::nullopt;
22
+ }
23
+
16
24
// / Return true if coerceAvailableValueToLoadType will succeed.
17
25
bool canCoerceMustAliasedValueToLoad (Value *StoredVal, Type *LoadTy,
18
- const DataLayout &DL ) {
26
+ Function *F ) {
19
27
Type *StoredTy = StoredVal->getType ();
20
-
21
28
if (StoredTy == LoadTy)
22
29
return true ;
23
30
31
+ const DataLayout &DL = F->getDataLayout ();
24
32
if (isa<ScalableVectorType>(StoredTy) && isa<ScalableVectorType>(LoadTy) &&
25
33
DL.getTypeSizeInBits (StoredTy) == DL.getTypeSizeInBits (LoadTy))
26
34
return true ;
27
35
28
- // If the loaded/stored value is a first class array/struct, or scalable type,
29
- // don't try to transform them. We need to be able to bitcast to integer.
30
- if (isFirstClassAggregateOrScalableType (LoadTy) ||
31
- isFirstClassAggregateOrScalableType (StoredTy))
32
- return false ;
33
-
34
- uint64_t StoreSize = DL.getTypeSizeInBits (StoredTy).getFixedValue ();
36
+ // If the loaded/stored value is a first class array/struct, don't try to
37
+ // transform them. We need to be able to bitcast to integer. For scalable
38
+ // vectors forwarded to fixed-sized vectors with a compile-time known
39
+ // vscale, @llvm.vector.extract is used.
40
+ uint64_t StoreSize, LoadSize;
41
+ if (isa<ScalableVectorType>(StoredTy) && isa<FixedVectorType>(LoadTy)) {
42
+ std::optional<unsigned > VScale = getKnownVScale (F);
43
+ if (!VScale || StoredTy->getScalarType () != LoadTy->getScalarType ())
44
+ return false ;
45
+
46
+ StoreSize =
47
+ DL.getTypeSizeInBits (StoredTy).getKnownMinValue () * VScale.value ();
48
+ LoadSize = DL.getTypeSizeInBits (LoadTy).getFixedValue ();
49
+ } else {
50
+ if (isFirstClassAggregateOrScalableType (LoadTy) ||
51
+ isFirstClassAggregateOrScalableType (StoredTy))
52
+ return false ;
53
+
54
+ StoreSize = DL.getTypeSizeInBits (StoredTy).getFixedValue ();
55
+ LoadSize = DL.getTypeSizeInBits (LoadTy).getFixedValue ();
56
+ }
35
57
36
58
// The store size must be byte-aligned to support future type casts.
37
59
if (llvm::alignTo (StoreSize, 8 ) != StoreSize)
38
60
return false ;
39
61
40
62
// The store has to be at least as big as the load.
41
- if (StoreSize < DL. getTypeSizeInBits (LoadTy). getFixedValue () )
63
+ if (StoreSize < LoadSize )
42
64
return false ;
43
65
44
66
bool StoredNI = DL.isNonIntegralPointerType (StoredTy->getScalarType ());
@@ -57,11 +79,10 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
57
79
return false ;
58
80
}
59
81
60
-
61
82
// The implementation below uses inttoptr for vectors of unequal size; we
62
83
// can't allow this for non integral pointers. We could teach it to extract
63
84
// exact subvectors if desired.
64
- if (StoredNI && StoreSize != DL. getTypeSizeInBits (LoadTy). getFixedValue () )
85
+ if (StoredNI && StoreSize != LoadSize )
65
86
return false ;
66
87
67
88
if (StoredTy->isTargetExtTy () || LoadTy->isTargetExtTy ())
@@ -79,14 +100,23 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
79
100
Value *coerceAvailableValueToLoadType (Value *StoredVal, Type *LoadedTy,
80
101
IRBuilderBase &Helper,
81
102
const DataLayout &DL) {
82
- assert (canCoerceMustAliasedValueToLoad (StoredVal, LoadedTy, DL) &&
103
+ assert (canCoerceMustAliasedValueToLoad (
104
+ StoredVal, LoadedTy, Helper.GetInsertBlock ()->getParent ()) &&
83
105
" precondition violation - materialization can't fail" );
84
106
if (auto *C = dyn_cast<Constant>(StoredVal))
85
107
StoredVal = ConstantFoldConstant (C, DL);
86
108
87
109
// If this is already the right type, just return it.
88
110
Type *StoredValTy = StoredVal->getType ();
89
111
112
+ // If this is a scalable vector forwarded to a fixed vector load, create
113
+ // a @llvm.vector.extract instead of bitcasts.
114
+ if (isa<ScalableVectorType>(StoredVal->getType ()) &&
115
+ isa<FixedVectorType>(LoadedTy)) {
116
+ return Helper.CreateIntrinsic (LoadedTy, Intrinsic::vector_extract,
117
+ {StoredVal, Helper.getInt64 (0 )});
118
+ }
119
+
90
120
TypeSize StoredValSize = DL.getTypeSizeInBits (StoredValTy);
91
121
TypeSize LoadedValSize = DL.getTypeSizeInBits (LoadedTy);
92
122
@@ -220,7 +250,7 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
220
250
if (isFirstClassAggregateOrScalableType (StoredVal->getType ()))
221
251
return -1 ;
222
252
223
- if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DL ))
253
+ if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DepSI-> getFunction () ))
224
254
return -1 ;
225
255
226
256
Value *StorePtr = DepSI->getPointerOperand ();
@@ -235,11 +265,11 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
235
265
// / the other load can feed into the second load.
236
266
int analyzeLoadFromClobberingLoad (Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
237
267
const DataLayout &DL) {
238
- // Cannot handle reading from store of first-class aggregate yet .
239
- if (DepLI-> getType ()-> isStructTy () || DepLI->getType ()-> isArrayTy ( ))
268
+ // Cannot handle reading from store of first-class aggregate or scalable type .
269
+ if (isFirstClassAggregateOrScalableType ( DepLI->getType ()))
240
270
return -1 ;
241
271
242
- if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DL ))
272
+ if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DepLI-> getFunction () ))
243
273
return -1 ;
244
274
245
275
Value *DepPtr = DepLI->getPointerOperand ();
@@ -315,6 +345,16 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
315
345
return SrcVal;
316
346
}
317
347
348
+ // For the case of a scalable vector beeing forwarded to a fixed-sized load,
349
+ // only equal element types are allowed and a @llvm.vector.extract will be
350
+ // used instead of bitcasts.
351
+ if (isa<ScalableVectorType>(SrcVal->getType ()) &&
352
+ isa<FixedVectorType>(LoadTy)) {
353
+ assert (Offset == 0 &&
354
+ SrcVal->getType ()->getScalarType () == LoadTy->getScalarType ());
355
+ return SrcVal;
356
+ }
357
+
318
358
uint64_t StoreSize =
319
359
(DL.getTypeSizeInBits (SrcVal->getType ()).getFixedValue () + 7 ) / 8 ;
320
360
uint64_t LoadSize = (DL.getTypeSizeInBits (LoadTy).getFixedValue () + 7 ) / 8 ;
@@ -348,6 +388,10 @@ Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
348
388
#ifndef NDEBUG
349
389
TypeSize SrcValSize = DL.getTypeStoreSize (SrcVal->getType ());
350
390
TypeSize LoadSize = DL.getTypeStoreSize (LoadTy);
391
+ if (SrcValSize.isScalable () && !LoadSize.isScalable ())
392
+ SrcValSize =
393
+ TypeSize::getFixed (SrcValSize.getKnownMinValue () *
394
+ getKnownVScale (InsertPt->getFunction ()).value ());
351
395
assert (SrcValSize.isScalable () == LoadSize.isScalable ());
352
396
assert ((SrcValSize.isScalable () || Offset + LoadSize <= SrcValSize) &&
353
397
" Expected Offset + LoadSize <= SrcValSize" );
0 commit comments