Skip to content

[BPF] introduce __attribute__((bpf_fastcall)) #101228

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Aug 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -2189,6 +2189,15 @@ def BTFTypeTag : TypeAttr {
let LangOpts = [COnly];
}

def BPFFastCall : InheritableAttr,
TargetSpecificAttr<TargetBPF> {
let Spellings = [Clang<"bpf_fastcall">];
let Subjects = SubjectList<[FunctionLike]>;
let Documentation = [BPFFastCallDocs];
let LangOpts = [COnly];
let SimpleHandler = 1;
}

def WebAssemblyExportName : InheritableAttr,
TargetSpecificAttr<TargetWebAssembly> {
let Spellings = [Clang<"export_name">];
Expand Down
19 changes: 19 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2317,6 +2317,25 @@ section.
}];
}

def BPFFastCallDocs : Documentation {
let Category = DocCatType;
let Content = [{
Functions annotated with this attribute are likely to be inlined by BPF JIT.
It is assumed that inlined implementation uses less caller saved registers,
than a regular function.
Specifically, the following registers are likely to be preserved:
- ``R0`` if function return value is ``void``;
- ``R2-R5` if function takes 1 argument;
- ``R3-R5` if function takes 2 arguments;
- ``R4-R5` if function takes 3 arguments;
- ``R5`` if function takes 4 arguments;

For such functions Clang generates code pattern that allows BPF JIT
to recognize and remove unnecessary spills and fills of the preserved
registers.
}];
}

def MipsInterruptDocs : Documentation {
let Category = DocCatFunction;
let Heading = "interrupt (MIPS)";
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/CGCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2447,6 +2447,8 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
FuncAttrs.addAttribute(llvm::Attribute::NoCfCheck);
if (TargetDecl->hasAttr<LeafAttr>())
FuncAttrs.addAttribute(llvm::Attribute::NoCallback);
if (TargetDecl->hasAttr<BPFFastCallAttr>())
FuncAttrs.addAttribute("bpf_fastcall");

HasOptnone = TargetDecl->hasAttr<OptimizeNoneAttr>();
if (auto *AllocSize = TargetDecl->getAttr<AllocSizeAttr>()) {
Expand Down
24 changes: 24 additions & 0 deletions clang/test/CodeGen/bpf-attr-bpf-fastcall-1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// REQUIRES: bpf-registered-target
// RUN: %clang_cc1 -triple bpf -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s

#define __bpf_fastcall __attribute__((bpf_fastcall))

void test(void) __bpf_fastcall;
void (*ptr)(void) __bpf_fastcall;

void foo(void) {
test();
(*ptr)();
}

// CHECK: @ptr = global ptr null
// CHECK: define {{.*}} void @foo()
// CHECK: entry:
// CHECK: call void @test() #[[call_attr:[0-9]+]]
// CHECK: %[[ptr:.*]] = load ptr, ptr @ptr, align 8
// CHECK: call void %[[ptr]]() #[[call_attr]]
// CHECK: ret void

// CHECK: declare void @test() #[[func_attr:[0-9]+]]
// CHECK: attributes #[[func_attr]] = { {{.*}}"bpf_fastcall"{{.*}} }
// CHECK: attributes #[[call_attr]] = { "bpf_fastcall" }
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
// CHECK-NEXT: AssumeAligned (SubjectMatchRule_objc_method, SubjectMatchRule_function)
// CHECK-NEXT: Availability ((SubjectMatchRule_record, SubjectMatchRule_enum, SubjectMatchRule_enum_constant, SubjectMatchRule_field, SubjectMatchRule_function, SubjectMatchRule_namespace, SubjectMatchRule_objc_category, SubjectMatchRule_objc_implementation, SubjectMatchRule_objc_interface, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property, SubjectMatchRule_objc_protocol, SubjectMatchRule_record, SubjectMatchRule_type_alias, SubjectMatchRule_variable))
// CHECK-NEXT: AvailableOnlyInDefaultEvalMethod (SubjectMatchRule_type_alias)
// CHECK-NEXT: BPFFastCall (SubjectMatchRule_hasType_functionType)
// CHECK-NEXT: BPFPreserveAccessIndex (SubjectMatchRule_record)
// CHECK-NEXT: BPFPreserveStaticOffset (SubjectMatchRule_record)
// CHECK-NEXT: BTFDeclTag (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record, SubjectMatchRule_field, SubjectMatchRule_type_alias)
Expand Down
14 changes: 14 additions & 0 deletions clang/test/Sema/bpf-attr-bpf-fastcall.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// REQUIRES: bpf-registered-target
// RUN: %clang_cc1 %s -triple bpf -verify

__attribute__((bpf_fastcall)) int var; // expected-warning {{'bpf_fastcall' attribute only applies to functions and function pointers}}

__attribute__((bpf_fastcall)) void func();
__attribute__((bpf_fastcall(1))) void func_invalid(); // expected-error {{'bpf_fastcall' attribute takes no arguments}}

void (*ptr1)(void) __attribute__((bpf_fastcall));
void (*ptr2)(void);
void foo(void) {
ptr2 = ptr1; // not an error
ptr1 = ptr2; // not an error
}
1 change: 1 addition & 0 deletions llvm/lib/Target/BPF/BPFCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,4 @@ def CC_BPF32 : CallingConv<[
]>;

def CSR : CalleeSavedRegs<(add R6, R7, R8, R9, R10)>;
def CSR_PreserveAll : CalleeSavedRegs<(add R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10)>;
31 changes: 31 additions & 0 deletions llvm/lib/Target/BPF/BPFISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,21 @@ SDValue BPFTargetLowering::LowerFormalArguments(

const size_t BPFTargetLowering::MaxArgs = 5;

static void resetRegMaskBit(const TargetRegisterInfo *TRI, uint32_t *RegMask,
MCRegister Reg) {
for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
}

static uint32_t *regMaskFromTemplate(const TargetRegisterInfo *TRI,
MachineFunction &MF,
const uint32_t *BaseRegMask) {
uint32_t *RegMask = MF.allocateRegMask();
unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
memcpy(RegMask, BaseRegMask, sizeof(RegMask[0]) * RegMaskSize);
return RegMask;
}

SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
Expand Down Expand Up @@ -513,6 +528,22 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (auto &Reg : RegsToPass)
Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));

bool HasFastCall =
(CLI.CB && isa<CallInst>(CLI.CB) && CLI.CB->hasFnAttr("bpf_fastcall"));
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (HasFastCall) {
uint32_t *RegMask = regMaskFromTemplate(
TRI, MF, TRI->getCallPreservedMask(MF, CallingConv::PreserveAll));
for (auto const &RegPair : RegsToPass)
resetRegMaskBit(TRI, RegMask, RegPair.first);
if (!CLI.CB->getType()->isVoidTy())
resetRegMaskBit(TRI, RegMask, BPF::R0);
Ops.push_back(DAG.getRegisterMask(RegMask));
} else {
Ops.push_back(
DAG.getRegisterMask(TRI->getCallPreservedMask(MF, CLI.CallConv)));
}

if (InGlue.getNode())
Ops.push_back(InGlue);

Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Target/BPF/BPFInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -677,9 +677,7 @@ let isBranch = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1 in {
}

// Jump and link
let isCall=1, hasDelaySlot=0, Uses = [R11],
// Potentially clobbered registers
Defs = [R0, R1, R2, R3, R4, R5] in {
let isCall=1, hasDelaySlot=0, Uses = [R11] in {
def JAL : CALL<"call">;
def JALX : CALLX<"callx">;
}
Expand Down
86 changes: 86 additions & 0 deletions llvm/lib/Target/BPF/BPFMIPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#include "BPFInstrInfo.h"
#include "BPFTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
Expand Down Expand Up @@ -319,6 +321,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
bool in16BitRange(int Num);
bool eliminateRedundantMov();
bool adjustBranch();
bool insertMissingCallerSavedSpills();

public:

Expand All @@ -333,6 +336,7 @@ struct BPFMIPreEmitPeephole : public MachineFunctionPass {
Changed = eliminateRedundantMov();
if (SupportGotol)
Changed = adjustBranch() || Changed;
Changed |= insertMissingCallerSavedSpills();
return Changed;
}
};
Expand Down Expand Up @@ -596,6 +600,88 @@ bool BPFMIPreEmitPeephole::adjustBranch() {
return Changed;
}

static const unsigned CallerSavedRegs[] = {BPF::R0, BPF::R1, BPF::R2,
BPF::R3, BPF::R4, BPF::R5};

struct BPFFastCall {
MachineInstr *MI;
unsigned LiveCallerSavedRegs;
};

static void collectBPFFastCalls(const TargetRegisterInfo *TRI,
LivePhysRegs &LiveRegs, MachineBasicBlock &BB,
SmallVectorImpl<BPFFastCall> &Calls) {
LiveRegs.init(*TRI);
LiveRegs.addLiveOuts(BB);
Calls.clear();
for (MachineInstr &MI : llvm::reverse(BB)) {
if (MI.isCall()) {
unsigned LiveCallerSavedRegs = 0;
for (MCRegister R : CallerSavedRegs) {
bool DoSpillFill = !MI.definesRegister(R, TRI) && LiveRegs.contains(R);
if (!DoSpillFill)
continue;
LiveCallerSavedRegs |= 1 << R;
}
if (LiveCallerSavedRegs)
Calls.push_back({&MI, LiveCallerSavedRegs});
}
LiveRegs.stepBackward(MI);
}
}

static int64_t computeMinFixedObjOffset(MachineFrameInfo &MFI,
unsigned SlotSize) {
int64_t MinFixedObjOffset = 0;
// Same logic as in X86FrameLowering::adjustFrameForMsvcCxxEh()
for (int I = MFI.getObjectIndexBegin(); I < MFI.getObjectIndexEnd(); ++I) {
if (MFI.isDeadObjectIndex(I))
continue;
MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
}
MinFixedObjOffset -=
(SlotSize + MinFixedObjOffset % SlotSize) & (SlotSize - 1);
return MinFixedObjOffset;
}

bool BPFMIPreEmitPeephole::insertMissingCallerSavedSpills() {
MachineFrameInfo &MFI = MF->getFrameInfo();
SmallVector<BPFFastCall, 8> Calls;
LivePhysRegs LiveRegs;
const unsigned SlotSize = 8;
int64_t MinFixedObjOffset = computeMinFixedObjOffset(MFI, SlotSize);
bool Changed = false;
for (MachineBasicBlock &BB : *MF) {
collectBPFFastCalls(TRI, LiveRegs, BB, Calls);
Changed |= !Calls.empty();
for (BPFFastCall &Call : Calls) {
int64_t CurOffset = MinFixedObjOffset;
for (MCRegister Reg : CallerSavedRegs) {
if (((1 << Reg) & Call.LiveCallerSavedRegs) == 0)
continue;
// Allocate stack object
CurOffset -= SlotSize;
MFI.CreateFixedSpillStackObject(SlotSize, CurOffset);
// Generate spill
BuildMI(BB, Call.MI->getIterator(), Call.MI->getDebugLoc(),
TII->get(BPF::STD))
.addReg(Reg)
.addReg(BPF::R10)
.addImm(CurOffset)
.addImm(0);
// Generate fill
BuildMI(BB, ++Call.MI->getIterator(), Call.MI->getDebugLoc(),
TII->get(BPF::LDD))
.addReg(Reg)
.addReg(BPF::R10)
.addImm(CurOffset)
.addImm(0);
}
}
}
return Changed;
}

} // end default namespace

INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/BPF/BPFRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@ BPFRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_SaveList;
}

const uint32_t *
BPFRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
switch (CC) {
default:
return CSR_RegMask;
case CallingConv::PreserveAll:
return CSR_PreserveAll_RegMask;
}
}

BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
markSuperRegs(Reserved, BPF::W10); // [W|R]10 is read only frame pointer
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/BPF/BPFRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ struct BPFRegisterInfo : public BPFGenRegisterInfo {

const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;

const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;

BitVector getReservedRegs(const MachineFunction &MF) const override;

bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
Expand Down
46 changes: 46 additions & 0 deletions llvm/test/CodeGen/BPF/bpf-fastcall-1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
; RUN: llc -O2 --march=bpfel %s -o - | FileCheck %s

; Generated from the following C code:
;
; #define __bpf_fastcall __attribute__((bpf_fastcall))
;
; void bar(void) __bpf_fastcall;
; void buz(long i, long j, long k);
;
; void foo(long i, long j, long k) {
; bar();
; buz(i, j, k);
; }
;
; Using the following command:
;
; clang --target=bpf -emit-llvm -O2 -S -o - t.c
;
; (unnecessary attrs removed maually)

; Check that function marked with bpf_fastcall does not clobber R1-R5.

define dso_local void @foo(i64 noundef %i, i64 noundef %j, i64 noundef %k) {
entry:
tail call void @bar() #1
tail call void @buz(i64 noundef %i, i64 noundef %j, i64 noundef %k)
ret void
}

; CHECK: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: *(u64 *)(r10 - 8) = r1
; CHECK-NEXT: *(u64 *)(r10 - 16) = r2
; CHECK-NEXT: *(u64 *)(r10 - 24) = r3
; CHECK-NEXT: call bar
; CHECK-NEXT: r3 = *(u64 *)(r10 - 24)
; CHECK-NEXT: r2 = *(u64 *)(r10 - 16)
; CHECK-NEXT: r1 = *(u64 *)(r10 - 8)
; CHECK-NEXT: call buz
; CHECK-NEXT: exit

declare dso_local void @bar() #0
declare dso_local void @buz(i64 noundef, i64 noundef, i64 noundef)

attributes #0 = { "bpf_fastcall" }
attributes #1 = { nounwind "bpf_fastcall" }
Loading
Loading