@@ -15,30 +15,42 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) {
15
15
16
16
// / Return true if coerceAvailableValueToLoadType will succeed.
17
17
bool canCoerceMustAliasedValueToLoad (Value *StoredVal, Type *LoadTy,
18
- const DataLayout &DL ) {
18
+ Function *F ) {
19
19
Type *StoredTy = StoredVal->getType ();
20
-
21
20
if (StoredTy == LoadTy)
22
21
return true ;
23
22
23
+ const DataLayout &DL = F->getDataLayout ();
24
+ TypeSize MinStoreSize = DL.getTypeSizeInBits (StoredTy);
25
+ TypeSize LoadSize = DL.getTypeSizeInBits (LoadTy);
24
26
if (isa<ScalableVectorType>(StoredTy) && isa<ScalableVectorType>(LoadTy) &&
25
- DL. getTypeSizeInBits (StoredTy) == DL. getTypeSizeInBits (LoadTy) )
27
+ MinStoreSize == LoadSize )
26
28
return true ;
27
29
28
- // If the loaded/stored value is a first class array/struct, or scalable type,
29
- // don't try to transform them. We need to be able to bitcast to integer.
30
- if (isFirstClassAggregateOrScalableType (LoadTy) ||
31
- isFirstClassAggregateOrScalableType (StoredTy))
30
+ // If the loaded/stored value is a first class array/struct, don't try to
31
+ // transform them. We need to be able to bitcast to integer. For scalable
32
+ // vectors forwarded to fixed-sized vectors @llvm.vector.extract is used.
33
+ if (isa<ScalableVectorType>(StoredTy) && isa<FixedVectorType>(LoadTy)) {
34
+ if (StoredTy->getScalarType () != LoadTy->getScalarType ())
35
+ return false ;
36
+
37
+ // If it is known at compile-time that the VScale is larger than one,
38
+ // use that information to allow for wider loads.
39
+ const auto &Attrs = F->getAttributes ().getFnAttrs ();
40
+ unsigned MinVScale = Attrs.getVScaleRangeMin ();
41
+ MinStoreSize =
42
+ TypeSize::getFixed (MinStoreSize.getKnownMinValue () * MinVScale);
43
+ } else if (isFirstClassAggregateOrScalableType (LoadTy) ||
44
+ isFirstClassAggregateOrScalableType (StoredTy)) {
32
45
return false ;
33
-
34
- uint64_t StoreSize = DL.getTypeSizeInBits (StoredTy).getFixedValue ();
46
+ }
35
47
36
48
// The store size must be byte-aligned to support future type casts.
37
- if (llvm::alignTo (StoreSize , 8 ) != StoreSize )
49
+ if (llvm::alignTo (MinStoreSize , 8 ) != MinStoreSize )
38
50
return false ;
39
51
40
52
// The store has to be at least as big as the load.
41
- if (StoreSize < DL. getTypeSizeInBits (LoadTy). getFixedValue ( ))
53
+ if (! TypeSize::isKnownGE (MinStoreSize, LoadSize ))
42
54
return false ;
43
55
44
56
bool StoredNI = DL.isNonIntegralPointerType (StoredTy->getScalarType ());
@@ -57,11 +69,10 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
57
69
return false ;
58
70
}
59
71
60
-
61
72
// The implementation below uses inttoptr for vectors of unequal size; we
62
73
// can't allow this for non integral pointers. We could teach it to extract
63
74
// exact subvectors if desired.
64
- if (StoredNI && StoreSize != DL. getTypeSizeInBits (LoadTy). getFixedValue ( ))
75
+ if (StoredNI && (StoredTy-> isScalableTy () || MinStoreSize != LoadSize ))
65
76
return false ;
66
77
67
78
if (StoredTy->isTargetExtTy () || LoadTy->isTargetExtTy ())
@@ -77,16 +88,24 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
77
88
// /
78
89
// / If we can't do it, return null.
79
90
Value *coerceAvailableValueToLoadType (Value *StoredVal, Type *LoadedTy,
80
- IRBuilderBase &Helper,
81
- const DataLayout &DL) {
82
- assert (canCoerceMustAliasedValueToLoad (StoredVal, LoadedTy, DL) &&
91
+ IRBuilderBase &Helper, Function *F) {
92
+ assert (canCoerceMustAliasedValueToLoad (StoredVal, LoadedTy, F) &&
83
93
" precondition violation - materialization can't fail" );
94
+ const DataLayout &DL = F->getDataLayout ();
84
95
if (auto *C = dyn_cast<Constant>(StoredVal))
85
96
StoredVal = ConstantFoldConstant (C, DL);
86
97
87
98
// If this is already the right type, just return it.
88
99
Type *StoredValTy = StoredVal->getType ();
89
100
101
+ // If this is a scalable vector forwarded to a fixed vector load, create
102
+ // a @llvm.vector.extract instead of bitcasts.
103
+ if (isa<ScalableVectorType>(StoredVal->getType ()) &&
104
+ isa<FixedVectorType>(LoadedTy)) {
105
+ return Helper.CreateIntrinsic (LoadedTy, Intrinsic::vector_extract,
106
+ {StoredVal, Helper.getInt64 (0 )});
107
+ }
108
+
90
109
TypeSize StoredValSize = DL.getTypeSizeInBits (StoredValTy);
91
110
TypeSize LoadedValSize = DL.getTypeSizeInBits (LoadedTy);
92
111
@@ -220,7 +239,7 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
220
239
if (isFirstClassAggregateOrScalableType (StoredVal->getType ()))
221
240
return -1 ;
222
241
223
- if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DL ))
242
+ if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DepSI-> getFunction () ))
224
243
return -1 ;
225
244
226
245
Value *StorePtr = DepSI->getPointerOperand ();
@@ -235,11 +254,11 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
235
254
// / the other load can feed into the second load.
236
255
int analyzeLoadFromClobberingLoad (Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
237
256
const DataLayout &DL) {
238
- // Cannot handle reading from store of first-class aggregate yet .
239
- if (DepLI-> getType ()-> isStructTy () || DepLI->getType ()-> isArrayTy ( ))
257
+ // Cannot handle reading from store of first-class aggregate or scalable type .
258
+ if (isFirstClassAggregateOrScalableType ( DepLI->getType ()))
240
259
return -1 ;
241
260
242
- if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DL ))
261
+ if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DepLI-> getFunction () ))
243
262
return -1 ;
244
263
245
264
Value *DepPtr = DepLI->getPointerOperand ();
@@ -315,6 +334,16 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
315
334
return SrcVal;
316
335
}
317
336
337
+ // For the case of a scalable vector being forwarded to a fixed-sized load,
338
+ // only equal element types are allowed and a @llvm.vector.extract will be
339
+ // used instead of bitcasts.
340
+ if (isa<ScalableVectorType>(SrcVal->getType ()) &&
341
+ isa<FixedVectorType>(LoadTy)) {
342
+ assert (Offset == 0 &&
343
+ SrcVal->getType ()->getScalarType () == LoadTy->getScalarType ());
344
+ return SrcVal;
345
+ }
346
+
318
347
uint64_t StoreSize =
319
348
(DL.getTypeSizeInBits (SrcVal->getType ()).getFixedValue () + 7 ) / 8 ;
320
349
uint64_t LoadSize = (DL.getTypeSizeInBits (LoadTy).getFixedValue () + 7 ) / 8 ;
@@ -344,20 +373,24 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
344
373
}
345
374
346
375
Value *getValueForLoad (Value *SrcVal, unsigned Offset, Type *LoadTy,
347
- Instruction *InsertPt, const DataLayout &DL) {
376
+ Instruction *InsertPt, Function *F) {
377
+ const DataLayout &DL = F->getDataLayout ();
348
378
#ifndef NDEBUG
349
- TypeSize SrcValSize = DL.getTypeStoreSize (SrcVal->getType ());
379
+ TypeSize MinSrcValSize = DL.getTypeStoreSize (SrcVal->getType ());
350
380
TypeSize LoadSize = DL.getTypeStoreSize (LoadTy);
351
- assert (SrcValSize.isScalable () == LoadSize.isScalable ());
352
- assert ((SrcValSize.isScalable () || Offset + LoadSize <= SrcValSize) &&
381
+ if (MinSrcValSize.isScalable () && !LoadSize.isScalable ())
382
+ MinSrcValSize =
383
+ TypeSize::getFixed (MinSrcValSize.getKnownMinValue () *
384
+ F->getAttributes ().getFnAttrs ().getVScaleRangeMin ());
385
+ assert ((MinSrcValSize.isScalable () || Offset + LoadSize <= MinSrcValSize) &&
353
386
" Expected Offset + LoadSize <= SrcValSize" );
354
- assert (
355
- (!SrcValSize. isScalable () || (Offset == 0 && LoadSize == SrcValSize )) &&
356
- " Expected scalable type sizes to match " );
387
+ assert ((!MinSrcValSize. isScalable () ||
388
+ (Offset == 0 && TypeSize::isKnownLE ( LoadSize, MinSrcValSize) )) &&
389
+ " Expected offset of zero and LoadSize <= SrcValSize " );
357
390
#endif
358
391
IRBuilder<> Builder (InsertPt);
359
392
SrcVal = getStoreValueForLoadHelper (SrcVal, Offset, LoadTy, Builder, DL);
360
- return coerceAvailableValueToLoadType (SrcVal, LoadTy, Builder, DL );
393
+ return coerceAvailableValueToLoadType (SrcVal, LoadTy, Builder, F );
361
394
}
362
395
363
396
Constant *getConstantValueForLoad (Constant *SrcVal, unsigned Offset,
@@ -408,7 +441,8 @@ Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
408
441
++NumBytesSet;
409
442
}
410
443
411
- return coerceAvailableValueToLoadType (Val, LoadTy, Builder, DL);
444
+ return coerceAvailableValueToLoadType (Val, LoadTy, Builder,
445
+ InsertPt->getFunction ());
412
446
}
413
447
414
448
// Otherwise, this is a memcpy/memmove from a constant global.
0 commit comments