@@ -117,6 +117,18 @@ static cl::opt<bool>
117
117
HoistCommon (" simplifycfg-hoist-common" , cl::Hidden, cl::init(true ),
118
118
cl::desc(" Hoist common instructions up to the parent block" ));
119
119
120
+ static cl::opt<bool > HoistLoadsStoresWithCondFaulting (
121
+ " simplifycfg-hoist-loads-stores-with-cond-faulting" , cl::Hidden,
122
+ cl::init (true ),
123
+ cl::desc(" Hoist loads/stores if the target supports "
124
+ " conditional faulting" ));
125
+
126
+ static cl::opt<unsigned > HoistLoadsStoresWithCondFaultingThreshold (
127
+ " hoist-loads-stores-with-cond-faulting-threshold" , cl::Hidden, cl::init(6 ),
128
+ cl::desc(" Control the maximal conditonal load/store that we are willing "
129
+ " to speculatively execute to eliminate conditional branch "
130
+ " (default = 6)" ));
131
+
120
132
static cl::opt<unsigned >
121
133
HoistCommonSkipLimit (" simplifycfg-hoist-common-skip-limit" , cl::Hidden,
122
134
cl::init (20 ),
@@ -2986,6 +2998,25 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
2986
2998
return BIEndProb < Likely;
2987
2999
}
2988
3000
3001
+ static bool isSafeCheapLoadStore (const Instruction *I,
3002
+ const TargetTransformInfo &TTI) {
3003
+ // Not handle volatile or atomic.
3004
+ if (auto *L = dyn_cast<LoadInst>(I)) {
3005
+ if (!L->isSimple ())
3006
+ return false ;
3007
+ } else if (auto *S = dyn_cast<StoreInst>(I)) {
3008
+ if (!S->isSimple ())
3009
+ return false ;
3010
+ } else
3011
+ return false ;
3012
+
3013
+ // llvm.masked.load/store use i32 for alignment while load/store use i64.
3014
+ // That's why we have the alignment limitation.
3015
+ // FIXME: Update the prototype of the intrinsics?
3016
+ return TTI.hasConditionalLoadStoreForType (getLoadStoreType (I)) &&
3017
+ getLoadStoreAlignment (I) < Value::MaximumAlignment;
3018
+ }
3019
+
2989
3020
// / Speculate a conditional basic block flattening the CFG.
2990
3021
// /
2991
3022
// / Note that this is a very risky transform currently. Speculating
@@ -3060,6 +3091,9 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3060
3091
SmallVector<Instruction *, 4 > SpeculatedDbgIntrinsics;
3061
3092
3062
3093
unsigned SpeculatedInstructions = 0 ;
3094
+ bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
3095
+ Options.HoistLoadsStoresWithCondFaulting ;
3096
+ SmallVector<Instruction *, 2 > SpeculatedConditionalLoadsStores;
3063
3097
Value *SpeculatedStoreValue = nullptr ;
3064
3098
StoreInst *SpeculatedStore = nullptr ;
3065
3099
EphemeralValueTracker EphTracker;
@@ -3088,22 +3122,33 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3088
3122
3089
3123
// Only speculatively execute a single instruction (not counting the
3090
3124
// terminator) for now.
3091
- ++SpeculatedInstructions;
3125
+ bool IsSafeCheapLoadStore = HoistLoadsStores &&
3126
+ isSafeCheapLoadStore (&I, TTI) &&
3127
+ SpeculatedConditionalLoadsStores.size () <
3128
+ HoistLoadsStoresWithCondFaultingThreshold;
3129
+ // Not count load/store into cost if target supports conditional faulting
3130
+ // b/c it's cheap to speculate it.
3131
+ if (IsSafeCheapLoadStore)
3132
+ SpeculatedConditionalLoadsStores.push_back (&I);
3133
+ else
3134
+ ++SpeculatedInstructions;
3135
+
3092
3136
if (SpeculatedInstructions > 1 )
3093
3137
return false ;
3094
3138
3095
3139
// Don't hoist the instruction if it's unsafe or expensive.
3096
- if (!isSafeToSpeculativelyExecute (&I) &&
3097
- !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore (
3098
- &I, BB, ThenBB, EndBB))))
3140
+ if (!IsSafeCheapLoadStore && !isSafeToSpeculativelyExecute (&I) &&
3141
+ !(HoistCondStores && !SpeculatedStoreValue &&
3142
+ (SpeculatedStoreValue =
3143
+ isSafeToSpeculateStore (&I, BB, ThenBB, EndBB))))
3099
3144
return false ;
3100
- if (!SpeculatedStoreValue &&
3145
+ if (!IsSafeCheapLoadStore && ! SpeculatedStoreValue &&
3101
3146
computeSpeculationCost (&I, TTI) >
3102
3147
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
3103
3148
return false ;
3104
3149
3105
3150
// Store the store speculation candidate.
3106
- if (SpeculatedStoreValue)
3151
+ if (!SpeculatedStore && SpeculatedStoreValue)
3107
3152
SpeculatedStore = cast<StoreInst>(&I);
3108
3153
3109
3154
// Do not hoist the instruction if any of its operands are defined but not
@@ -3130,11 +3175,11 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3130
3175
3131
3176
// Check that we can insert the selects and that it's not too expensive to do
3132
3177
// so.
3133
- bool Convert = SpeculatedStore != nullptr ;
3178
+ bool Convert =
3179
+ SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty ();
3134
3180
InstructionCost Cost = 0 ;
3135
3181
Convert |= validateAndCostRequiredSelects (BB, ThenBB, EndBB,
3136
- SpeculatedInstructions,
3137
- Cost, TTI);
3182
+ SpeculatedInstructions, Cost, TTI);
3138
3183
if (!Convert || Cost > Budget)
3139
3184
return false ;
3140
3185
@@ -3222,6 +3267,107 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
3222
3267
BB->splice (BI->getIterator (), ThenBB, ThenBB->begin (),
3223
3268
std::prev (ThenBB->end ()));
3224
3269
3270
+ // If the target supports conditional faulting,
3271
+ // we look for the following pattern:
3272
+ // \code
3273
+ // BB:
3274
+ // ...
3275
+ // %cond = icmp ult %x, %y
3276
+ // br i1 %cond, label %TrueBB, label %FalseBB
3277
+ // FalseBB:
3278
+ // store i32 1, ptr %q, align 4
3279
+ // ...
3280
+ // TrueBB:
3281
+ // %maskedloadstore = load i32, ptr %b, align 4
3282
+ // store i32 %maskedloadstore, ptr %p, align 4
3283
+ // ...
3284
+ // \endcode
3285
+ //
3286
+ // and transform it into:
3287
+ //
3288
+ // \code
3289
+ // BB:
3290
+ // ...
3291
+ // %cond = icmp ult %x, %y
3292
+ // %maskedloadstore = cload i32, ptr %b, %cond
3293
+ // cstore i32 %maskedloadstore, ptr %p, %cond
3294
+ // cstore i32 1, ptr %q, ~%cond
3295
+ // br i1 %cond, label %TrueBB, label %FalseBB
3296
+ // FalseBB:
3297
+ // ...
3298
+ // TrueBB:
3299
+ // ...
3300
+ // \endcode
3301
+ //
3302
+ // where cload/cstore are represented by llvm.masked.load/store intrinsics,
3303
+ // e.g.
3304
+ //
3305
+ // \code
3306
+ // %vcond = bitcast i1 %cond to <1 x i1>
3307
+ // %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
3308
+ // (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
3309
+ // %maskedloadstore = bitcast <1 x i32> %v0 to i32
3310
+ // call void @llvm.masked.store.v1i32.p0
3311
+ // (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
3312
+ // %cond.not = xor i1 %cond, true
3313
+ // %vcond.not = bitcast i1 %cond.not to <1 x i>
3314
+ // call void @llvm.masked.store.v1i32.p0
3315
+ // (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
3316
+ // \endcode
3317
+ //
3318
+ // So we need to turn hoisted load/store into cload/cstore.
3319
+ auto &Context = BI->getParent ()->getContext ();
3320
+ auto *VCondTy = FixedVectorType::get (Type::getInt1Ty (Context), 1 );
3321
+ auto *Cond = BI->getOperand (0 );
3322
+ Value *Mask = nullptr ;
3323
+ // Construct the condition if needed.
3324
+ if (!SpeculatedConditionalLoadsStores.empty ()) {
3325
+ IRBuilder<> Builder (SpeculatedConditionalLoadsStores.back ());
3326
+ Mask = Builder.CreateBitCast (
3327
+ Invert ? Builder.CreateXor (Cond, ConstantInt::getTrue (Context)) : Cond,
3328
+ VCondTy);
3329
+ }
3330
+ for (auto *I : SpeculatedConditionalLoadsStores) {
3331
+ IRBuilder<> Builder (I);
3332
+ // We currently assume conditional faulting load/store is supported for
3333
+ // scalar types only when creating new instructions. This can be easily
3334
+ // extended for vector types in the future.
3335
+ assert (!getLoadStoreType (I)->isVectorTy () && " not implemented" );
3336
+ auto *Op0 = I->getOperand (0 );
3337
+ Instruction *MaskedLoadStore = nullptr ;
3338
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
3339
+ // Handle Load.
3340
+ auto *Ty = I->getType ();
3341
+ MaskedLoadStore = Builder.CreateMaskedLoad (FixedVectorType::get (Ty, 1 ),
3342
+ Op0, LI->getAlign (), Mask);
3343
+ I->replaceAllUsesWith (Builder.CreateBitCast (MaskedLoadStore, Ty));
3344
+ } else {
3345
+ // Handle Store.
3346
+ auto *StoredVal =
3347
+ Builder.CreateBitCast (Op0, FixedVectorType::get (Op0->getType (), 1 ));
3348
+ MaskedLoadStore = Builder.CreateMaskedStore (
3349
+ StoredVal, I->getOperand (1 ), cast<StoreInst>(I)->getAlign (), Mask);
3350
+ }
3351
+ // For non-debug metadata, only !annotation, !range, !nonnull and !align are
3352
+ // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
3353
+ //
3354
+ // !nonnull, !align : Not support pointer type, no need to keep.
3355
+ // !range: Load type is changed from scalar to vector, but the metadata on
3356
+ // vector specifies a per-element range, so the semantics stay the
3357
+ // same. Keep it.
3358
+ // !annotation: Not impact semantics. Keep it.
3359
+ I->dropUBImplyingAttrsAndUnknownMetadata (
3360
+ {LLVMContext::MD_range, LLVMContext::MD_annotation});
3361
+ // FIXME: DIAssignID is not supported for masked store yet.
3362
+ // (Verifier::visitDIAssignIDMetadata)
3363
+ at::deleteAssignmentMarkers (I);
3364
+ I->eraseMetadataIf ([](unsigned MDKind, MDNode *Node) {
3365
+ return Node->getMetadataID () == Metadata::DIAssignIDKind;
3366
+ });
3367
+ MaskedLoadStore->copyMetadata (*I);
3368
+ I->eraseFromParent ();
3369
+ }
3370
+
3225
3371
// Insert selects and rewrite the PHI operands.
3226
3372
IRBuilder<NoFolder> Builder (BI);
3227
3373
for (PHINode &PN : EndBB->phis ()) {
0 commit comments