@@ -114,6 +114,16 @@ static cl::opt<double> CallScale(
114
114
" call-scale" ,
115
115
cl::desc (" Call score scale coefficient (when --split-strategy=cdsplit)" ),
116
116
cl::init(0.95 ), cl::ReallyHidden, cl::cat(BoltOptCategory));
117
+
118
+ static cl::opt<double >
119
+ CallPower (" call-power" ,
120
+ cl::desc (" Call score power (when --split-strategy=cdsplit)" ),
121
+ cl::init(0.05 ), cl::ReallyHidden, cl::cat(BoltOptCategory));
122
+
123
+ static cl::opt<double >
124
+ JumpPower (" jump-power" ,
125
+ cl::desc (" Jump score power (when --split-strategy=cdsplit)" ),
126
+ cl::init(0.15 ), cl::ReallyHidden, cl::cat(BoltOptCategory));
117
127
} // namespace opts
118
128
119
129
namespace {
@@ -195,6 +205,13 @@ struct SplitCacheDirected final : public SplitStrategy {
195
205
size_t Count;
196
206
};
197
207
208
+ struct SplitScore {
209
+ size_t SplitIndex;
210
+ size_t HotSizeReduction = 0 ;
211
+ double LocalScore = 0 ;
212
+ double CoverCallScore = 0 ;
213
+ };
214
+
198
215
// Auxiliary variables used by the algorithm.
199
216
size_t TotalNumBlocks{0 };
200
217
size_t OrigHotSectionSize{0 };
@@ -340,8 +357,9 @@ struct SplitCacheDirected final : public SplitStrategy {
340
357
// We only care about new addresses of blocks in hot/warm.
341
358
if (BB->getFragmentNum () == FragmentNum::cold ())
342
359
break ;
360
+ const size_t NewSize = BB->getOutputSize ();
343
361
BB->setOutputStartAddress (CurrentAddr);
344
- CurrentAddr += BB-> getOutputSize () ;
362
+ CurrentAddr += NewSize ;
345
363
BB->setOutputEndAddress (CurrentAddr);
346
364
if (BB->getLayoutIndex () == SplitIndex) {
347
365
NewHotEndAddr = CurrentAddr;
@@ -402,13 +420,192 @@ struct SplitCacheDirected final : public SplitStrategy {
402
420
return CoverCalls;
403
421
}
404
422
423
+ // / Compute the edge score of a call edge.
424
+ double computeCallScore (uint64_t CallCount, size_t CallLength) {
425
+ // Increase call lengths by 1 to avoid raising 0 to a negative power.
426
+ return opts::CallScale * static_cast <double >(CallCount) /
427
+ std::pow (static_cast <double >(CallLength + 1 ), opts::CallPower);
428
+ }
429
+
430
+ // / Compute the edge score of a jump (branch) edge.
431
+ double computeJumpScore (uint64_t JumpCount, size_t JumpLength) {
432
+ // Increase jump lengths by 1 to avoid raising 0 to a negative power.
433
+ return static_cast <double >(JumpCount) /
434
+ std::pow (static_cast <double >(JumpLength + 1 ), opts::JumpPower);
435
+ }
436
+
437
+ // / Compute sum of scores over jumps within \p BlockOrder given \p SplitIndex.
438
+ // / Increament Score.LocalScore in place by the sum.
439
+ void computeJumpScore (const BasicBlockOrder &BlockOrder,
440
+ const size_t SplitIndex, SplitScore &Score) {
441
+
442
+ for (const BinaryBasicBlock *SrcBB : BlockOrder) {
443
+ if (SrcBB->getKnownExecutionCount () == 0 )
444
+ continue ;
445
+
446
+ const size_t SrcBBEndAddr = SrcBB->getOutputAddressRange ().second ;
447
+
448
+ for (const auto Pair : zip (SrcBB->successors (), SrcBB->branch_info ())) {
449
+ const BinaryBasicBlock *DstBB = std::get<0 >(Pair);
450
+ const BinaryBasicBlock::BinaryBranchInfo &Branch = std::get<1 >(Pair);
451
+ const size_t JumpCount = Branch.Count ;
452
+
453
+ if (JumpCount == 0 )
454
+ continue ;
455
+
456
+ const size_t DstBBStartAddr = DstBB->getOutputAddressRange ().first ;
457
+ const size_t NewJumpLength =
458
+ AbsoluteDifference (SrcBBEndAddr, DstBBStartAddr);
459
+ Score.LocalScore += computeJumpScore (JumpCount, NewJumpLength);
460
+ }
461
+ }
462
+ }
463
+
464
+ // / Compute sum of scores over calls originated in the current function
465
+ // / given \p SplitIndex. Increament Score.LocalScore in place by the sum.
466
+ void computeLocalCallScore (const BasicBlockOrder &BlockOrder,
467
+ const size_t SplitIndex, SplitScore &Score) {
468
+ if (opts::CallScale == 0 )
469
+ return ;
470
+
471
+ // Global index of the last block in the current function.
472
+ // This is later used to determine whether a call originated in the current
473
+ // function is to a function that comes after the current function.
474
+ const size_t LastGlobalIndex = GlobalIndices[BlockOrder.back ()];
475
+
476
+ // The length of calls originated in the input function can increase /
477
+ // decrease depending on the splitting decision.
478
+ for (const BinaryBasicBlock *SrcBB : BlockOrder) {
479
+ const size_t CallCount = SrcBB->getKnownExecutionCount ();
480
+ // If SrcBB does not call any functions, skip it.
481
+ if (CallCount == 0 )
482
+ continue ;
483
+
484
+ // Obtain an estimate on the end address of the src basic block
485
+ // after splitting at SplitIndex.
486
+ const size_t SrcBBEndAddr = SrcBB->getOutputAddressRange ().second ;
487
+
488
+ for (const BinaryBasicBlock *DstBB : Callees[GlobalIndices[SrcBB]]) {
489
+ // Obtain an estimate on the start address of the dst basic block
490
+ // after splitting at SplitIndex. If DstBB is in a function before
491
+ // the current function, then its start address remains unchanged.
492
+ size_t DstBBStartAddr = BBOffsets[DstBB];
493
+ // If DstBB is in a function after the current function, then its
494
+ // start address should be adjusted based on the reduction in hot size.
495
+ if (GlobalIndices[DstBB] > LastGlobalIndex) {
496
+ assert (DstBBStartAddr >= Score.HotSizeReduction );
497
+ DstBBStartAddr -= Score.HotSizeReduction ;
498
+ }
499
+ const size_t NewCallLength =
500
+ AbsoluteDifference (SrcBBEndAddr, DstBBStartAddr);
501
+ Score.LocalScore += computeCallScore (CallCount, NewCallLength);
502
+ }
503
+ }
504
+ }
505
+
506
+ // / Compute sum of splitting scores for cover calls of the input function.
507
+ // / Increament Score.CoverCallScore in place by the sum.
508
+ void computeCoverCallScore (const BasicBlockOrder &BlockOrder,
509
+ const size_t SplitIndex,
510
+ const std::vector<CallInfo> &CoverCalls,
511
+ SplitScore &Score) {
512
+ if (opts::CallScale == 0 )
513
+ return ;
514
+
515
+ for (const CallInfo CI : CoverCalls) {
516
+ assert (CI.Length >= Score.HotSizeReduction &&
517
+ " Length of cover calls must exceed reduced size of hot fragment." );
518
+ // Compute the new length of the call, which is shorter than the original
519
+ // one by the size of the splitted fragment minus the total size increase.
520
+ const size_t NewCallLength = CI.Length - Score.HotSizeReduction ;
521
+ Score.CoverCallScore += computeCallScore (CI.Count , NewCallLength);
522
+ }
523
+ }
524
+
525
+ // / Compute the split score of splitting a function at a given index.
526
+ // / The split score consists of local score and cover score. Cover call score
527
+ // / is expensive to compute. As a result, we pass in a \p ReferenceScore and
528
+ // / compute cover score only when the local score exceeds that in the
529
+ // / ReferenceScore or that the size reduction of the hot fragment is larger
530
+ // / than that achieved by the split index of the ReferenceScore. This function
531
+ // / returns \p Score of SplitScore type. It contains the local score and cover
532
+ // / score (if computed) of the current splitting index. For easier book
533
+ // / keeping and comparison, it also stores the split index and the resulting
534
+ // / reduction in hot fragment size.
535
+ SplitScore computeSplitScore (const BinaryFunction &BF,
536
+ const BasicBlockOrder &BlockOrder,
537
+ const size_t SplitIndex,
538
+ const std::vector<CallInfo> &CoverCalls,
539
+ const SplitScore &ReferenceScore) {
540
+ // Populate BinaryBasicBlock::OutputAddressRange with estimated
541
+ // new start and end addresses after hot-warm splitting at SplitIndex.
542
+ size_t OldHotEnd;
543
+ size_t NewHotEnd;
544
+ std::tie (OldHotEnd, NewHotEnd) =
545
+ estimatePostSplitBBAddress (BlockOrder, SplitIndex);
546
+
547
+ SplitScore Score;
548
+ Score.SplitIndex = SplitIndex;
549
+
550
+ // It's not worth splitting if OldHotEnd < NewHotEnd.
551
+ if (OldHotEnd < NewHotEnd)
552
+ return Score;
553
+
554
+ // Hot fragment size reduction due to splitting.
555
+ Score.HotSizeReduction = OldHotEnd - NewHotEnd;
556
+
557
+ // First part of LocalScore is the sum over call edges originated in the
558
+ // input function. These edges can get shorter or longer depending on
559
+ // SplitIndex. Score.LocalScore is increamented in place.
560
+ computeLocalCallScore (BlockOrder, SplitIndex, Score);
561
+
562
+ // Second part of LocalScore is the sum over jump edges with src basic block
563
+ // and dst basic block in the current function. Score.LocalScore is
564
+ // increamented in place.
565
+ computeJumpScore (BlockOrder, SplitIndex, Score);
566
+
567
+ // There is no need to compute CoverCallScore if we have already found
568
+ // another split index with a bigger LocalScore and bigger HotSizeReduction.
569
+ if (Score.LocalScore <= ReferenceScore.LocalScore &&
570
+ Score.HotSizeReduction <= ReferenceScore.HotSizeReduction )
571
+ return Score;
572
+
573
+ // Compute CoverCallScore and store in Score in place.
574
+ computeCoverCallScore (BlockOrder, SplitIndex, CoverCalls, Score);
575
+ return Score;
576
+ }
577
+
405
578
// / Find the best index for splitting. The returned value is the index of the
406
579
// / last hot basic block. Hence, "no splitting" is equivalent to returning the
407
580
// / value which is one less than the size of the function.
408
581
size_t findSplitIndex (const BinaryFunction &BF,
409
582
const BasicBlockOrder &BlockOrder) {
410
- // Placeholder: hot-warm split after entry block.
411
- return 0 ;
583
+ // Find all function calls that can be shortened if we move blocks of the
584
+ // current function to warm/cold
585
+ const std::vector<CallInfo> CoverCalls = extractCoverCalls (BF);
586
+
587
+ // Try all possible split indices (blocks with Index <= SplitIndex are in
588
+ // hot) and find the one maximizing the splitting score.
589
+ SplitScore BestScore;
590
+ double BestScoreSum = -1.0 ;
591
+ SplitScore ReferenceScore;
592
+ for (size_t Index = 0 ; Index < BlockOrder.size (); Index++) {
593
+ const BinaryBasicBlock *LastHotBB = BlockOrder[Index];
594
+ // No need to keep cold blocks in the hot section.
595
+ if (LastHotBB->getFragmentNum () == FragmentNum::cold ())
596
+ break ;
597
+ const SplitScore Score =
598
+ computeSplitScore (BF, BlockOrder, Index, CoverCalls, ReferenceScore);
599
+ double ScoreSum = Score.LocalScore + Score.CoverCallScore ;
600
+ if (ScoreSum > BestScoreSum) {
601
+ BestScoreSum = ScoreSum;
602
+ BestScore = Score;
603
+ }
604
+ if (Score.LocalScore > ReferenceScore.LocalScore )
605
+ ReferenceScore = Score;
606
+ }
607
+
608
+ return BestScore.SplitIndex ;
412
609
}
413
610
};
414
611
0 commit comments