Skip to content

Commit e9b2e16

Browse files
authored
[BPF] introduce __attribute__((bpf_fastcall)) (#101228)
This commit introduces attribute bpf_fastcall to declare BPF functions that do not clobber some of the caller saved registers (R0-R5). The idea is to generate the code complying with generic BPF ABI, but allow compatible Linux Kernel to remove unnecessary spills and fills of non-scratched registers (given some compiler assistance). For such functions do register allocation as-if caller saved registers are not clobbered, but later wrap the calls with spill and fill patterns that are simple to recognize in kernel. For example for the following C code: #define __bpf_fastcall __attribute__((bpf_fastcall)) void bar(void) __bpf_fastcall; void buz(long i, long j, long k); void foo(long i, long j, long k) { bar(); buz(i, j, k); } First allocate registers as if: foo: call bar # note: no spills for i,j,k (r1,r2,r3) call buz exit And later insert spills fills on the peephole phase: foo: *(u64 *)(r10 - 8) = r1; # Such call pattern is *(u64 *)(r10 - 16) = r2; # correct when used with *(u64 *)(r10 - 24) = r3; # old kernels. call bar r3 = *(u64 *)(r10 - 24); # But also allows new r2 = *(u64 *)(r10 - 16); # kernels to recognize the r1 = *(u64 *)(r10 - 8); # pattern and remove spills/fills. call buz exit The offsets for generated spills/fills are picked as minimal stack offsets for the function. Allocated stack slots are not used for any other purposes, in order to simplify in-kernel analysis. Corresponding functionality had been merged in Linux Kernel as [this](https://lore.kernel.org/bpf/172179364482.1919.9590705031832457529.git-patchwork-notify@kernel.org/) patch set (the patch assumed that `no_caller_saved_regsiters` attribute would be used by LLVM, naming does not matter for the Kernel).
1 parent a3fea06 commit e9b2e16

16 files changed

+488
-3
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2200,6 +2200,15 @@ def BTFTypeTag : TypeAttr {
22002200
let LangOpts = [COnly];
22012201
}
22022202

2203+
def BPFFastCall : InheritableAttr,
2204+
TargetSpecificAttr<TargetBPF> {
2205+
let Spellings = [Clang<"bpf_fastcall">];
2206+
let Subjects = SubjectList<[FunctionLike]>;
2207+
let Documentation = [BPFFastCallDocs];
2208+
let LangOpts = [COnly];
2209+
let SimpleHandler = 1;
2210+
}
2211+
22032212
def WebAssemblyExportName : InheritableAttr,
22042213
TargetSpecificAttr<TargetWebAssembly> {
22052214
let Spellings = [Clang<"export_name">];

clang/include/clang/Basic/AttrDocs.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2345,6 +2345,25 @@ section.
23452345
}];
23462346
}
23472347

2348+
def BPFFastCallDocs : Documentation {
2349+
let Category = DocCatType;
2350+
let Content = [{
2351+
Functions annotated with this attribute are likely to be inlined by BPF JIT.
2352+
It is assumed that inlined implementation uses less caller saved registers,
2353+
than a regular function.
2354+
Specifically, the following registers are likely to be preserved:
2355+
- ``R0`` if function return value is ``void``;
2356+
- ``R2-R5` if function takes 1 argument;
2357+
- ``R3-R5` if function takes 2 arguments;
2358+
- ``R4-R5` if function takes 3 arguments;
2359+
- ``R5`` if function takes 4 arguments;
2360+
2361+
For such functions Clang generates code pattern that allows BPF JIT
2362+
to recognize and remove unnecessary spills and fills of the preserved
2363+
registers.
2364+
}];
2365+
}
2366+
23482367
def MipsInterruptDocs : Documentation {
23492368
let Category = DocCatFunction;
23502369
let Heading = "interrupt (MIPS)";

clang/lib/CodeGen/CGCall.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2422,6 +2422,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
24222422
FuncAttrs.addAttribute(llvm::Attribute::NoCfCheck);
24232423
if (TargetDecl->hasAttr<LeafAttr>())
24242424
FuncAttrs.addAttribute(llvm::Attribute::NoCallback);
2425+
if (TargetDecl->hasAttr<BPFFastCallAttr>())
2426+
FuncAttrs.addAttribute("bpf_fastcall");
24252427

24262428
HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
24272429
if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// REQUIRES: bpf-registered-target
2+
// RUN: %clang_cc1 -triple bpf -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
3+
4+
#define __bpf_fastcall __attribute__((bpf_fastcall))
5+
6+
void test(void) __bpf_fastcall;
7+
void (*ptr)(void) __bpf_fastcall;
8+
9+
void foo(void) {
10+
test();
11+
(*ptr)();
12+
}
13+
14+
// CHECK: @ptr = global ptr null
15+
// CHECK: define {{.*}} void @foo()
16+
// CHECK: entry:
17+
// CHECK: call void @test() #[[call_attr:[0-9]+]]
18+
// CHECK: %[[ptr:.*]] = load ptr, ptr @ptr, align 8
19+
// CHECK: call void %[[ptr]]() #[[call_attr]]
20+
// CHECK: ret void
21+
22+
// CHECK: declare void @test() #[[func_attr:[0-9]+]]
23+
// CHECK: attributes #[[func_attr]] = { {{.*}}"bpf_fastcall"{{.*}} }
24+
// CHECK: attributes #[[call_attr]] = { "bpf_fastcall" }

clang/test/Misc/pragma-attribute-supported-attributes-list.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
// CHECK-NEXT: AssumeAligned (SubjectMatchRule_objc_method, SubjectMatchRule_function)
2323
// CHECK-NEXT: Availability ((SubjectMatchRule_record, SubjectMatchRule_enum, SubjectMatchRule_enum_constant, SubjectMatchRule_field, SubjectMatchRule_function, SubjectMatchRule_namespace, SubjectMatchRule_objc_category, SubjectMatchRule_objc_implementation, SubjectMatchRule_objc_interface, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property, SubjectMatchRule_objc_protocol, SubjectMatchRule_record, SubjectMatchRule_type_alias, SubjectMatchRule_variable))
2424
// CHECK-NEXT: AvailableOnlyInDefaultEvalMethod (SubjectMatchRule_type_alias)
25+
// CHECK-NEXT: BPFFastCall (SubjectMatchRule_hasType_functionType)
2526
// CHECK-NEXT: BPFPreserveAccessIndex (SubjectMatchRule_record)
2627
// CHECK-NEXT: BPFPreserveStaticOffset (SubjectMatchRule_record)
2728
// CHECK-NEXT: BTFDeclTag (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record, SubjectMatchRule_field, SubjectMatchRule_type_alias)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// REQUIRES: bpf-registered-target
2+
// RUN: %clang_cc1 %s -triple bpf -verify
3+
4+
__attribute__((bpf_fastcall)) int var; // expected-warning {{'bpf_fastcall' attribute only applies to functions and function pointers}}
5+
6+
__attribute__((bpf_fastcall)) void func();
7+
__attribute__((bpf_fastcall(1))) void func_invalid(); // expected-error {{'bpf_fastcall' attribute takes no arguments}}
8+
9+
void (*ptr1)(void) __attribute__((bpf_fastcall));
10+
void (*ptr2)(void);
11+
void foo(void) {
12+
ptr2 = ptr1; // not an error
13+
ptr1 = ptr2; // not an error
14+
}

llvm/lib/Target/BPF/BPFCallingConv.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,4 @@ def CC_BPF32 : CallingConv<[
4646
]>;
4747

4848
def CSR : CalleeSavedRegs<(add R6, R7, R8, R9, R10)>;
49+
def CSR_PreserveAll : CalleeSavedRegs<(add R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10)>;

llvm/lib/Target/BPF/BPFISelLowering.cpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,6 +402,21 @@ SDValue BPFTargetLowering::LowerFormalArguments(
402402

403403
const size_t BPFTargetLowering::MaxArgs = 5;
404404

405+
static void resetRegMaskBit(const TargetRegisterInfo *TRI, uint32_t *RegMask,
406+
MCRegister Reg) {
407+
for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
408+
RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
409+
}
410+
411+
static uint32_t *regMaskFromTemplate(const TargetRegisterInfo *TRI,
412+
MachineFunction &MF,
413+
const uint32_t *BaseRegMask) {
414+
uint32_t *RegMask = MF.allocateRegMask();
415+
unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
416+
memcpy(RegMask, BaseRegMask, sizeof(RegMask[0]) * RegMaskSize);
417+
return RegMask;
418+
}
419+
405420
SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
406421
SmallVectorImpl<SDValue> &InVals) const {
407422
SelectionDAG &DAG = CLI.DAG;
@@ -513,6 +528,22 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
513528
for (auto &Reg : RegsToPass)
514529
Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
515530

531+
bool HasFastCall =
532+
(CLI.CB && isa<CallInst>(CLI.CB) && CLI.CB->hasFnAttr("bpf_fastcall"));
533+
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
534+
if (HasFastCall) {
535+
uint32_t *RegMask = regMaskFromTemplate(
536+
TRI, MF, TRI->getCallPreservedMask(MF, CallingConv::PreserveAll));
537+
for (auto const &RegPair : RegsToPass)
538+
resetRegMaskBit(TRI, RegMask, RegPair.first);
539+
if (!CLI.CB->getType()->isVoidTy())
540+
resetRegMaskBit(TRI, RegMask, BPF::R0);
541+
Ops.push_back(DAG.getRegisterMask(RegMask));
542+
} else {
543+
Ops.push_back(
544+
DAG.getRegisterMask(TRI->getCallPreservedMask(MF, CLI.CallConv)));
545+
}
546+
516547
if (InGlue.getNode())
517548
Ops.push_back(InGlue);
518549

llvm/lib/Target/BPF/BPFInstrInfo.td

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -677,9 +677,7 @@ let isBranch = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1 in {
677677
}
678678

679679
// Jump and link
680-
let isCall=1, hasDelaySlot=0, Uses = [R11],
681-
// Potentially clobbered registers
682-
Defs = [R0, R1, R2, R3, R4, R5] in {
680+
let isCall=1, hasDelaySlot=0, Uses = [R11] in {
683681
def JAL : CALL<"call">;
684682
def JALX : CALLX<"callx">;
685683
}

llvm/lib/Target/BPF/BPFMIPeephole.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
#include "BPFInstrInfo.h"
2525
#include "BPFTargetMachine.h"
2626
#include "llvm/ADT/Statistic.h"
27+
#include "llvm/CodeGen/LivePhysRegs.h"
28+
#include "llvm/CodeGen/MachineFrameInfo.h"
2729
#include "llvm/CodeGen/MachineFunctionPass.h"
2830
#include "llvm/CodeGen/MachineInstrBuilder.h"
2931
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -319,6 +321,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
319321
bool in16BitRange(int Num);
320322
bool eliminateRedundantMov();
321323
bool adjustBranch();
324+
bool insertMissingCallerSavedSpills();
322325

323326
public:
324327

@@ -333,6 +336,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
333336
Changed = eliminateRedundantMov();
334337
if (SupportGotol)
335338
Changed = adjustBranch() || Changed;
339+
Changed |= insertMissingCallerSavedSpills();
336340
return Changed;
337341
}
338342
};
@@ -596,6 +600,88 @@ bool BPFMIPreEmitPeephole::adjustBranch() {
596600
return Changed;
597601
}
598602

603+
static const unsigned CallerSavedRegs[] = {BPF::R0, BPF::R1, BPF::R2,
604+
BPF::R3, BPF::R4, BPF::R5};
605+
606+
struct BPFFastCall {
607+
MachineInstr *MI;
608+
unsigned LiveCallerSavedRegs;
609+
};
610+
611+
static void collectBPFFastCalls(const TargetRegisterInfo *TRI,
612+
LivePhysRegs &LiveRegs, MachineBasicBlock &BB,
613+
SmallVectorImpl<BPFFastCall> &Calls) {
614+
LiveRegs.init(*TRI);
615+
LiveRegs.addLiveOuts(BB);
616+
Calls.clear();
617+
for (MachineInstr &MI : llvm::reverse(BB)) {
618+
if (MI.isCall()) {
619+
unsigned LiveCallerSavedRegs = 0;
620+
for (MCRegister R : CallerSavedRegs) {
621+
bool DoSpillFill = !MI.definesRegister(R, TRI) && LiveRegs.contains(R);
622+
if (!DoSpillFill)
623+
continue;
624+
LiveCallerSavedRegs |= 1 << R;
625+
}
626+
if (LiveCallerSavedRegs)
627+
Calls.push_back({&MI, LiveCallerSavedRegs});
628+
}
629+
LiveRegs.stepBackward(MI);
630+
}
631+
}
632+
633+
static int64_t computeMinFixedObjOffset(MachineFrameInfo &MFI,
634+
unsigned SlotSize) {
635+
int64_t MinFixedObjOffset = 0;
636+
// Same logic as in X86FrameLowering::adjustFrameForMsvcCxxEh()
637+
for (int I = MFI.getObjectIndexBegin(); I < MFI.getObjectIndexEnd(); ++I) {
638+
if (MFI.isDeadObjectIndex(I))
639+
continue;
640+
MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
641+
}
642+
MinFixedObjOffset -=
643+
(SlotSize + MinFixedObjOffset % SlotSize) & (SlotSize - 1);
644+
return MinFixedObjOffset;
645+
}
646+
647+
bool BPFMIPreEmitPeephole::insertMissingCallerSavedSpills() {
648+
MachineFrameInfo &MFI = MF->getFrameInfo();
649+
SmallVector<BPFFastCall, 8> Calls;
650+
LivePhysRegs LiveRegs;
651+
const unsigned SlotSize = 8;
652+
int64_t MinFixedObjOffset = computeMinFixedObjOffset(MFI, SlotSize);
653+
bool Changed = false;
654+
for (MachineBasicBlock &BB : *MF) {
655+
collectBPFFastCalls(TRI, LiveRegs, BB, Calls);
656+
Changed |= !Calls.empty();
657+
for (BPFFastCall &Call : Calls) {
658+
int64_t CurOffset = MinFixedObjOffset;
659+
for (MCRegister Reg : CallerSavedRegs) {
660+
if (((1 << Reg) & Call.LiveCallerSavedRegs) == 0)
661+
continue;
662+
// Allocate stack object
663+
CurOffset -= SlotSize;
664+
MFI.CreateFixedSpillStackObject(SlotSize, CurOffset);
665+
// Generate spill
666+
BuildMI(BB, Call.MI->getIterator(), Call.MI->getDebugLoc(),
667+
TII->get(BPF::STD))
668+
.addReg(Reg)
669+
.addReg(BPF::R10)
670+
.addImm(CurOffset)
671+
.addImm(0);
672+
// Generate fill
673+
BuildMI(BB, ++Call.MI->getIterator(), Call.MI->getDebugLoc(),
674+
TII->get(BPF::LDD))
675+
.addReg(Reg)
676+
.addReg(BPF::R10)
677+
.addImm(CurOffset)
678+
.addImm(0);
679+
}
680+
}
681+
}
682+
return Changed;
683+
}
684+
599685
} // end default namespace
600686

601687
INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",

llvm/lib/Target/BPF/BPFRegisterInfo.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,17 @@ BPFRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
4040
return CSR_SaveList;
4141
}
4242

43+
const uint32_t *
44+
BPFRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
45+
CallingConv::ID CC) const {
46+
switch (CC) {
47+
default:
48+
return CSR_RegMask;
49+
case CallingConv::PreserveAll:
50+
return CSR_PreserveAll_RegMask;
51+
}
52+
}
53+
4354
BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
4455
BitVector Reserved(getNumRegs());
4556
markSuperRegs(Reserved, BPF::W10); // [W|R]10 is read only frame pointer

llvm/lib/Target/BPF/BPFRegisterInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {
2626

2727
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
2828

29+
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
30+
CallingConv::ID) const override;
31+
2932
BitVector getReservedRegs(const MachineFunction &MF) const override;
3033

3134
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s
2+
3+
; Generated from the following C code:
4+
;
5+
; #define __bpf_fastcall __attribute__((bpf_fastcall))
6+
;
7+
; void bar(void) __bpf_fastcall;
8+
; void buz(long i, long j, long k);
9+
;
10+
; void foo(long i, long j, long k) {
11+
; bar();
12+
; buz(i, j, k);
13+
; }
14+
;
15+
; Using the following command:
16+
;
17+
; clang --target=bpf -emit-llvm -O2 -S -o - t.c
18+
;
19+
; (unnecessary attrs removed maually)
20+
21+
; Check that function marked with bpf_fastcall does not clobber R1-R5.
22+
23+
define dso_local void @foo(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
24+
entry:
25+
tail call void @bar() #1
26+
tail call void @buz(i64 noundef %i, i64 noundef %j, i64 noundef %k)
27+
ret void
28+
}
29+
30+
; CHECK: foo:
31+
; CHECK: # %bb.0:
32+
; CHECK-NEXT: *(u64 *)(r10 - 8) = r1
33+
; CHECK-NEXT: *(u64 *)(r10 - 16) = r2
34+
; CHECK-NEXT: *(u64 *)(r10 - 24) = r3
35+
; CHECK-NEXT: call bar
36+
; CHECK-NEXT: r3 = *(u64 *)(r10 - 24)
37+
; CHECK-NEXT: r2 = *(u64 *)(r10 - 16)
38+
; CHECK-NEXT: r1 = *(u64 *)(r10 - 8)
39+
; CHECK-NEXT: call buz
40+
; CHECK-NEXT: exit
41+
42+
declare dso_local void @bar() #0
43+
declare dso_local void @buz(i64 noundef, i64 noundef, i64 noundef)
44+
45+
attributes #0 = { "bpf_fastcall" }
46+
attributes #1 = { nounwind "bpf_fastcall" }

0 commit comments

Comments
 (0)