@@ -660,8 +660,9 @@ class DataflowSrcSafetyAnalysis
660
660
//
661
661
// Then, a function can be split into a number of disjoint contiguous sequences
662
662
// of instructions without labels in between. These sequences can be processed
663
- // the same way basic blocks are processed by data-flow analysis, assuming
664
- // pessimistically that all registers are unsafe at the start of each sequence.
663
+ // the same way basic blocks are processed by data-flow analysis, with the same
664
+ // pessimistic estimation of the initial state at the start of each sequence
665
+ // (except the first instruction of the function).
665
666
class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
666
667
BinaryFunction &BF;
667
668
MCPlusBuilder::AllocatorIdTy AllocId;
@@ -672,6 +673,30 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
672
673
BC.MIB ->removeAnnotation (I.second , StateAnnotationIndex);
673
674
}
674
675
676
+ // / Compute a reasonably pessimistic estimation of the register state when
677
+ // / the previous instruction is not known for sure. Take the set of registers
678
+ // / which are trusted at function entry and remove all registers that can be
679
+ // / clobbered inside this function.
680
+ SrcState computePessimisticState (BinaryFunction &BF) {
681
+ BitVector ClobberedRegs (NumRegs);
682
+ for (auto &I : BF.instrs ()) {
683
+ MCInst &Inst = I.second ;
684
+ BC.MIB ->getClobberedRegs (Inst, ClobberedRegs);
685
+
686
+ // If this is a call instruction, no register is safe anymore, unless
687
+ // it is a tail call. Ignore tail calls for the purpose of estimating the
688
+ // worst-case scenario, assuming no instructions are executed in the
689
+ // caller after this point anyway.
690
+ if (BC.MIB ->isCall (Inst) && !BC.MIB ->isTailCall (Inst))
691
+ ClobberedRegs.set ();
692
+ }
693
+
694
+ SrcState S = createEntryState ();
695
+ S.SafeToDerefRegs .reset (ClobberedRegs);
696
+ S.TrustedRegs .reset (ClobberedRegs);
697
+ return S;
698
+ }
699
+
675
700
public:
676
701
CFGUnawareSrcSafetyAnalysis (BinaryFunction &BF,
677
702
MCPlusBuilder::AllocatorIdTy AllocId,
@@ -682,6 +707,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
682
707
}
683
708
684
709
void run () override {
710
+ const SrcState DefaultState = computePessimisticState (BF);
685
711
SrcState S = createEntryState ();
686
712
for (auto &I : BF.instrs ()) {
687
713
MCInst &Inst = I.second ;
@@ -696,7 +722,7 @@ class CFGUnawareSrcSafetyAnalysis : public SrcSafetyAnalysis {
696
722
LLVM_DEBUG ({
697
723
traceInst (BC, " Due to label, resetting the state before" , Inst);
698
724
});
699
- S = createUnsafeState () ;
725
+ S = DefaultState ;
700
726
}
701
727
702
728
// Check if we need to remove an old annotation (this is the case if
@@ -1231,6 +1257,83 @@ shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst,
1231
1257
return make_report (RetKind, Inst, *RetReg);
1232
1258
}
1233
1259
1260
+ // / While BOLT already marks some of the branch instructions as tail calls,
1261
+ // / this function tries to improve the coverage by including less obvious cases
1262
+ // / when it is possible to do without introducing too many false positives.
1263
+ static bool shouldAnalyzeTailCallInst (const BinaryContext &BC,
1264
+ const BinaryFunction &BF,
1265
+ const MCInstReference &Inst) {
1266
+ // Some BC.MIB->isXYZ(Inst) methods simply delegate to MCInstrDesc::isXYZ()
1267
+ // (such as isBranch at the time of writing this comment), some don't (such
1268
+ // as isCall). For that reason, call MCInstrDesc's methods explicitly when
1269
+ // it is important.
1270
+ const MCInstrDesc &Desc =
1271
+ BC.MII ->get (static_cast <const MCInst &>(Inst).getOpcode ());
1272
+ // Tail call should be a branch (but not necessarily an indirect one).
1273
+ if (!Desc.isBranch ())
1274
+ return false ;
1275
+
1276
+ // Always analyze the branches already marked as tail calls by BOLT.
1277
+ if (BC.MIB ->isTailCall (Inst))
1278
+ return true ;
1279
+
1280
+ // Try to also check the branches marked as "UNKNOWN CONTROL FLOW" - the
1281
+ // below is a simplified condition from BinaryContext::printInstruction.
1282
+ bool IsUnknownControlFlow =
1283
+ BC.MIB ->isIndirectBranch (Inst) && !BC.MIB ->getJumpTable (Inst);
1284
+
1285
+ if (BF.hasCFG () && IsUnknownControlFlow)
1286
+ return true ;
1287
+
1288
+ return false ;
1289
+ }
1290
+
1291
+ static std::optional<BriefReport<MCPhysReg>>
1292
+ shouldReportUnsafeTailCall (const BinaryContext &BC, const BinaryFunction &BF,
1293
+ const MCInstReference &Inst, const SrcState &S) {
1294
+ static const GadgetKind UntrustedLRKind (
1295
+ " untrusted link register found before tail call" );
1296
+
1297
+ if (!shouldAnalyzeTailCallInst (BC, BF, Inst))
1298
+ return std::nullopt;
1299
+
1300
+ // Not only the set of registers returned by getTrustedLiveInRegs() can be
1301
+ // seen as a reasonable target-independent _approximation_ of "the LR", these
1302
+ // are *exactly* those registers used by SrcSafetyAnalysis to initialize the
1303
+ // set of trusted registers on function entry.
1304
+ // Thus, this function basically checks that the precondition expected to be
1305
+ // imposed by a function call instruction (which is hardcoded into the target-
1306
+ // specific getTrustedLiveInRegs() function) is also respected on tail calls.
1307
+ SmallVector<MCPhysReg> RegsToCheck = BC.MIB ->getTrustedLiveInRegs ();
1308
+ LLVM_DEBUG ({
1309
+ traceInst (BC, " Found tail call inst" , Inst);
1310
+ traceRegMask (BC, " Trusted regs" , S.TrustedRegs );
1311
+ });
1312
+
1313
+ // In musl on AArch64, the _start function sets LR to zero and calls the next
1314
+ // stage initialization function at the end, something along these lines:
1315
+ //
1316
+ // _start:
1317
+ // mov x30, #0
1318
+ // ; ... other initialization ...
1319
+ // b _start_c ; performs "exit" system call at some point
1320
+ //
1321
+ // As this would produce a false positive for every executable linked with
1322
+ // such libc, ignore tail calls performed by ELF entry function.
1323
+ if (BC.StartFunctionAddress &&
1324
+ *BC.StartFunctionAddress == Inst.getFunction ()->getAddress ()) {
1325
+ LLVM_DEBUG ({ dbgs () << " Skipping tail call in ELF entry function.\n " ; });
1326
+ return std::nullopt;
1327
+ }
1328
+
1329
+ // Returns at most one report per instruction - this is probably OK...
1330
+ for (auto Reg : RegsToCheck)
1331
+ if (!S.TrustedRegs [Reg])
1332
+ return make_report (UntrustedLRKind, Inst, Reg);
1333
+
1334
+ return std::nullopt;
1335
+ }
1336
+
1234
1337
static std::optional<BriefReport<MCPhysReg>>
1235
1338
shouldReportCallGadget (const BinaryContext &BC, const MCInstReference &Inst,
1236
1339
const SrcState &S) {
@@ -1371,6 +1474,9 @@ void FunctionAnalysisContext::findUnsafeUses(
1371
1474
if (PacRetGadgetsOnly)
1372
1475
return ;
1373
1476
1477
+ if (auto Report = shouldReportUnsafeTailCall (BC, BF, Inst, S))
1478
+ Reports.push_back (*Report);
1479
+
1374
1480
if (auto Report = shouldReportCallGadget (BC, Inst, S))
1375
1481
Reports.push_back (*Report);
1376
1482
if (auto Report = shouldReportSigningOracle (BC, Inst, S))
0 commit comments