Skip to content

Commit 98d3e3f

Browse files
author
Yonghong Song
committed
Revert "BPF: Ensure __sync_fetch_and_add() always generate atomic_fetch_add insn (#101428)"
This reverts commit c566769. See discussion in [1]. Currently, with -mcpu=v1/v2, atomic_fetch_add() insn is generated for (void)__sync_fetch_and_add(...). This breaks backward compatibility since there are codes who runs on old system (< 5.12) which does not support atomci_fetch_add(). Now let revert previous comment ([1]) and add additional logic in the next patch to ensure for (void)__sync_fetch_and_add(...), v1/v2: generate locked add insn >= v3: generate atomic_fetch_add insn [1] #101428
1 parent 438ad9f commit 98d3e3f

File tree

10 files changed

+344
-28
lines changed

10 files changed

+344
-28
lines changed

llvm/lib/Target/BPF/BPF.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
2828
FunctionPass *createBPFMISimplifyPatchablePass();
2929
FunctionPass *createBPFMIPeepholePass();
3030
FunctionPass *createBPFMIPreEmitPeepholePass();
31+
FunctionPass *createBPFMIPreEmitCheckingPass();
3132

3233
InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
3334
const BPFSubtarget &,
@@ -36,6 +37,7 @@ InstructionSelector *createBPFInstructionSelector(const BPFTargetMachine &,
3637
void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
3738
void initializeBPFDAGToDAGISelLegacyPass(PassRegistry &);
3839
void initializeBPFMIPeepholePass(PassRegistry &);
40+
void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
3941
void initializeBPFMIPreEmitPeepholePass(PassRegistry &);
4042
void initializeBPFMISimplifyPatchablePass(PassRegistry &);
4143

llvm/lib/Target/BPF/BPFInstrInfo.td

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -787,12 +787,12 @@ let Predicates = [BPFNoALU32] in {
787787
}
788788

789789
// Atomic XADD for BPFNoALU32
790-
class XADD<BPFWidthModifer SizeOp, string OpcodeStr>
790+
class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
791791
: TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value,
792792
(outs GPR:$dst),
793793
(ins MEMri:$addr, GPR:$val),
794794
"lock *("#OpcodeStr#" *)($addr) += $val",
795-
[]> {
795+
[(set GPR:$dst, (OpNode ADDRri:$addr, GPR:$val))]> {
796796
bits<4> dst;
797797
bits<20> addr;
798798

@@ -803,6 +803,12 @@ class XADD<BPFWidthModifer SizeOp, string OpcodeStr>
803803
let BPFClass = BPF_STX;
804804
}
805805

806+
let Constraints = "$dst = $val" in {
807+
let Predicates = [BPFNoALU32] in {
808+
def XADDW : XADD<BPF_W, "u32", atomic_load_add_i32>;
809+
}
810+
}
811+
806812
// Atomic add, and, or, xor
807813
class ATOMIC_NOFETCH<BPFArithOp Opc, string Opstr>
808814
: TYPE_LD_ST<BPF_ATOMIC.Value, BPF_DW.Value,
@@ -887,13 +893,6 @@ class XFALU32<BPFWidthModifer SizeOp, BPFArithOp Opc, string OpcodeStr,
887893
let BPFClass = BPF_STX;
888894
}
889895

890-
let Constraints = "$dst = $val" in {
891-
let Predicates = [BPFNoALU32] in {
892-
def XADDW : XADD<BPF_W, "u32">;
893-
def XFADDW : XFALU64<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_i32>;
894-
}
895-
}
896-
897896
let Constraints = "$dst = $val" in {
898897
let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
899898
def XFADDW32 : XFALU32<BPF_W, BPF_ADD, "u32", "add", atomic_load_add_i32>;

llvm/lib/Target/BPF/BPFMIChecking.cpp

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
//===-------------- BPFMIChecking.cpp - MI Checking Legality -------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass performs checking to signal errors for certain illegal usages at
10+
// MachineInstruction layer. Specially, the result of XADD{32,64} insn should
11+
// not be used. The pass is done at the PreEmit pass right before the
12+
// machine code is emitted at which point the register liveness information
13+
// is still available.
14+
//
15+
//===----------------------------------------------------------------------===//
16+
17+
#include "BPF.h"
18+
#include "BPFInstrInfo.h"
19+
#include "BPFTargetMachine.h"
20+
#include "llvm/CodeGen/MachineFunctionPass.h"
21+
#include "llvm/CodeGen/MachineInstrBuilder.h"
22+
#include "llvm/CodeGen/MachineRegisterInfo.h"
23+
#include "llvm/IR/DiagnosticInfo.h"
24+
#include "llvm/Support/Debug.h"
25+
26+
using namespace llvm;
27+
28+
#define DEBUG_TYPE "bpf-mi-checking"
29+
30+
namespace {
31+
32+
struct BPFMIPreEmitChecking : public MachineFunctionPass {
33+
34+
static char ID;
35+
MachineFunction *MF;
36+
const TargetRegisterInfo *TRI;
37+
38+
BPFMIPreEmitChecking() : MachineFunctionPass(ID) {
39+
initializeBPFMIPreEmitCheckingPass(*PassRegistry::getPassRegistry());
40+
}
41+
42+
private:
43+
// Initialize class variables.
44+
void initialize(MachineFunction &MFParm);
45+
46+
bool processAtomicInsts();
47+
48+
public:
49+
50+
// Main entry point for this pass.
51+
bool runOnMachineFunction(MachineFunction &MF) override {
52+
if (!skipFunction(MF.getFunction())) {
53+
initialize(MF);
54+
return processAtomicInsts();
55+
}
56+
return false;
57+
}
58+
};
59+
60+
// Initialize class variables.
61+
void BPFMIPreEmitChecking::initialize(MachineFunction &MFParm) {
62+
MF = &MFParm;
63+
TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
64+
LLVM_DEBUG(dbgs() << "*** BPF PreEmit checking pass ***\n\n");
65+
}
66+
67+
// Make sure all Defs of XADD are dead, meaning any result of XADD insn is not
68+
// used.
69+
//
70+
// NOTE: BPF backend hasn't enabled sub-register liveness track, so when the
71+
// source and destination operands of XADD are GPR32, there is no sub-register
72+
// dead info. If we rely on the generic MachineInstr::allDefsAreDead, then we
73+
// will raise false alarm on GPR32 Def.
74+
//
75+
// To support GPR32 Def, ideally we could just enable sub-registr liveness track
76+
// on BPF backend, then allDefsAreDead could work on GPR32 Def. This requires
77+
// implementing TargetSubtargetInfo::enableSubRegLiveness on BPF.
78+
//
79+
// However, sub-register liveness tracking module inside LLVM is actually
80+
// designed for the situation where one register could be split into more than
81+
// one sub-registers for which case each sub-register could have their own
82+
// liveness and kill one of them doesn't kill others. So, tracking liveness for
83+
// each make sense.
84+
//
85+
// For BPF, each 64-bit register could only have one 32-bit sub-register. This
86+
// is exactly the case which LLVM think brings no benefits for doing
87+
// sub-register tracking, because the live range of sub-register must always
88+
// equal to its parent register, therefore liveness tracking is disabled even
89+
// the back-end has implemented enableSubRegLiveness. The detailed information
90+
// is at r232695:
91+
//
92+
// Author: Matthias Braun <[email protected]>
93+
// Date: Thu Mar 19 00:21:58 2015 +0000
94+
// Do not track subregister liveness when it brings no benefits
95+
//
96+
// Hence, for BPF, we enhance MachineInstr::allDefsAreDead. Given the solo
97+
// sub-register always has the same liveness as its parent register, LLVM is
98+
// already attaching a implicit 64-bit register Def whenever the there is
99+
// a sub-register Def. The liveness of the implicit 64-bit Def is available.
100+
// For example, for "lock *(u32 *)(r0 + 4) += w9", the MachineOperand info could
101+
// be:
102+
//
103+
// $w9 = XADDW32 killed $r0, 4, $w9(tied-def 0),
104+
// implicit killed $r9, implicit-def dead $r9
105+
//
106+
// Even though w9 is not marked as Dead, the parent register r9 is marked as
107+
// Dead correctly, and it is safe to use such information or our purpose.
108+
static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
109+
const MCRegisterClass *GPR64RegClass =
110+
&BPFMCRegisterClasses[BPF::GPRRegClassID];
111+
std::vector<unsigned> GPR32LiveDefs;
112+
std::vector<unsigned> GPR64DeadDefs;
113+
114+
for (const MachineOperand &MO : MI.operands()) {
115+
bool RegIsGPR64;
116+
117+
if (!MO.isReg() || MO.isUse())
118+
continue;
119+
120+
RegIsGPR64 = GPR64RegClass->contains(MO.getReg());
121+
if (!MO.isDead()) {
122+
// It is a GPR64 live Def, we are sure it is live. */
123+
if (RegIsGPR64)
124+
return true;
125+
// It is a GPR32 live Def, we are unsure whether it is really dead due to
126+
// no sub-register liveness tracking. Push it to vector for deferred
127+
// check.
128+
GPR32LiveDefs.push_back(MO.getReg());
129+
continue;
130+
}
131+
132+
// Record any GPR64 dead Def as some unmarked GPR32 could be alias of its
133+
// low 32-bit.
134+
if (RegIsGPR64)
135+
GPR64DeadDefs.push_back(MO.getReg());
136+
}
137+
138+
// No GPR32 live Def, safe to return false.
139+
if (GPR32LiveDefs.empty())
140+
return false;
141+
142+
// No GPR64 dead Def, so all those GPR32 live Def can't have alias, therefore
143+
// must be truely live, safe to return true.
144+
if (GPR64DeadDefs.empty())
145+
return true;
146+
147+
// Otherwise, return true if any aliased SuperReg of GPR32 is not dead.
148+
for (auto I : GPR32LiveDefs)
149+
for (MCPhysReg SR : TRI->superregs(I))
150+
if (!llvm::is_contained(GPR64DeadDefs, SR))
151+
return true;
152+
153+
return false;
154+
}
155+
156+
bool BPFMIPreEmitChecking::processAtomicInsts() {
157+
for (MachineBasicBlock &MBB : *MF) {
158+
for (MachineInstr &MI : MBB) {
159+
if (MI.getOpcode() != BPF::XADDW &&
160+
MI.getOpcode() != BPF::XADDD &&
161+
MI.getOpcode() != BPF::XADDW32)
162+
continue;
163+
164+
LLVM_DEBUG(MI.dump());
165+
if (hasLiveDefs(MI, TRI)) {
166+
DebugLoc Empty;
167+
const DebugLoc &DL = MI.getDebugLoc();
168+
const Function &F = MF->getFunction();
169+
F.getContext().diagnose(DiagnosticInfoUnsupported{
170+
F, "Invalid usage of the XADD return value", DL});
171+
}
172+
}
173+
}
174+
175+
// Check return values of atomic_fetch_and_{add,and,or,xor}.
176+
// If the return is not used, the atomic_fetch_and_<op> instruction
177+
// is replaced with atomic_<op> instruction.
178+
MachineInstr *ToErase = nullptr;
179+
bool Changed = false;
180+
const BPFInstrInfo *TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
181+
for (MachineBasicBlock &MBB : *MF) {
182+
for (MachineInstr &MI : MBB) {
183+
if (ToErase) {
184+
ToErase->eraseFromParent();
185+
ToErase = nullptr;
186+
}
187+
188+
if (MI.getOpcode() != BPF::XFADDW32 && MI.getOpcode() != BPF::XFADDD &&
189+
MI.getOpcode() != BPF::XFANDW32 && MI.getOpcode() != BPF::XFANDD &&
190+
MI.getOpcode() != BPF::XFXORW32 && MI.getOpcode() != BPF::XFXORD &&
191+
MI.getOpcode() != BPF::XFORW32 && MI.getOpcode() != BPF::XFORD)
192+
continue;
193+
194+
if (hasLiveDefs(MI, TRI))
195+
continue;
196+
197+
LLVM_DEBUG(dbgs() << "Transforming "; MI.dump());
198+
unsigned newOpcode;
199+
switch (MI.getOpcode()) {
200+
case BPF::XFADDW32:
201+
newOpcode = BPF::XADDW32;
202+
break;
203+
case BPF::XFADDD:
204+
newOpcode = BPF::XADDD;
205+
break;
206+
case BPF::XFANDW32:
207+
newOpcode = BPF::XANDW32;
208+
break;
209+
case BPF::XFANDD:
210+
newOpcode = BPF::XANDD;
211+
break;
212+
case BPF::XFXORW32:
213+
newOpcode = BPF::XXORW32;
214+
break;
215+
case BPF::XFXORD:
216+
newOpcode = BPF::XXORD;
217+
break;
218+
case BPF::XFORW32:
219+
newOpcode = BPF::XORW32;
220+
break;
221+
case BPF::XFORD:
222+
newOpcode = BPF::XORD;
223+
break;
224+
default:
225+
llvm_unreachable("Incorrect Atomic Instruction Opcode");
226+
}
227+
228+
BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(newOpcode))
229+
.add(MI.getOperand(0))
230+
.add(MI.getOperand(1))
231+
.add(MI.getOperand(2))
232+
.add(MI.getOperand(3));
233+
234+
ToErase = &MI;
235+
Changed = true;
236+
}
237+
}
238+
239+
return Changed;
240+
}
241+
242+
} // end default namespace
243+
244+
INITIALIZE_PASS(BPFMIPreEmitChecking, "bpf-mi-pemit-checking",
245+
"BPF PreEmit Checking", false, false)
246+
247+
char BPFMIPreEmitChecking::ID = 0;
248+
FunctionPass* llvm::createBPFMIPreEmitCheckingPass()
249+
{
250+
return new BPFMIPreEmitChecking();
251+
}

llvm/lib/Target/BPF/BPFTargetMachine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ void BPFPassConfig::addMachineSSAOptimization() {
178178
}
179179

180180
void BPFPassConfig::addPreEmitPass() {
181+
addPass(createBPFMIPreEmitCheckingPass());
181182
if (getOptLevel() != CodeGenOptLevel::None)
182183
if (!DisableMIPeephole)
183184
addPass(createBPFMIPreEmitPeepholePass());

llvm/lib/Target/BPF/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ add_llvm_target(BPFCodeGen
3939
BPFSubtarget.cpp
4040
BPFTargetMachine.cpp
4141
BPFMIPeephole.cpp
42+
BPFMIChecking.cpp
4243
BPFMISimplifyPatchable.cpp
4344
BTFDebug.cpp
4445

llvm/test/CodeGen/BPF/atomics.ll

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,22 @@
1-
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck --check-prefixes=CHECK,CHECK-V2 %s
2-
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefixes=CHECK,CHECK-V3 %s
1+
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding | FileCheck %s
2+
; RUN: llc < %s -march=bpfel -verify-machineinstrs -show-mc-encoding -mcpu=v3 | FileCheck --check-prefix=CHECK-V3 %s
33

44
; CHECK-LABEL: test_load_add_32
5-
; CHECK-V2: r2 = atomic_fetch_add((u32 *)(r1 + 0), r2)
6-
; CHECK-V3: w2 = atomic_fetch_add((u32 *)(r1 + 0), w2)
7-
; CHECK: encoding: [0xc3,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
5+
; CHECK: lock *(u32 *)(r1 + 0) += r2
6+
; CHECK: encoding: [0xc3,0x21
7+
; CHECK-V3: lock *(u32 *)(r1 + 0) += w2
8+
; CHECK-V3: encoding: [0xc3,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
89
define void @test_load_add_32(ptr %p, i32 zeroext %v) {
910
entry:
1011
atomicrmw add ptr %p, i32 %v seq_cst
1112
ret void
1213
}
1314

1415
; CHECK-LABEL: test_load_add_64
15-
; CHECK: r2 = atomic_fetch_add((u64 *)(r1 + 0), r2)
16-
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x01,0x00,0x00,0x00]
16+
; CHECK: lock *(u64 *)(r1 + 0) += r2
17+
; CHECK: encoding: [0xdb,0x21
18+
; CHECK-V3: lock *(u64 *)(r1 + 0) += r2
19+
; CHECK-V3: encoding: [0xdb,0x21,0x00,0x00,0x00,0x00,0x00,0x00]
1720
define void @test_load_add_64(ptr %p, i64 zeroext %v) {
1821
entry:
1922
atomicrmw add ptr %p, i64 %v seq_cst

llvm/test/CodeGen/BPF/atomics_2.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,8 @@ entry:
214214
}
215215

216216
; CHECK-LABEL: test_atomic_xor_32
217-
; CHECK: w2 = atomic_fetch_xor((u32 *)(r1 + 0), w2)
218-
; CHECK: encoding: [0xc3,0x21,0x00,0x00,0xa1,0x00,0x00,0x00]
217+
; CHECK: lock *(u32 *)(r1 + 0) ^= w2
218+
; CHECK: encoding: [0xc3,0x21,0x00,0x00,0xa0,0x00,0x00,0x00]
219219
; CHECK: w0 = 0
220220
define dso_local i32 @test_atomic_xor_32(ptr nocapture %p, i32 %v) local_unnamed_addr {
221221
entry:
@@ -224,8 +224,8 @@ entry:
224224
}
225225

226226
; CHECK-LABEL: test_atomic_xor_64
227-
; CHECK: r2 = atomic_fetch_xor((u64 *)(r1 + 0), r2)
228-
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa1,0x00,0x00,0x00]
227+
; CHECK: lock *(u64 *)(r1 + 0) ^= r2
228+
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0xa0,0x00,0x00,0x00]
229229
; CHECK: w0 = 0
230230
define dso_local i32 @test_atomic_xor_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
231231
entry:
@@ -234,8 +234,8 @@ entry:
234234
}
235235

236236
; CHECK-LABEL: test_atomic_and_64
237-
; CHECK: r2 = atomic_fetch_and((u64 *)(r1 + 0), r2)
238-
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x51,0x00,0x00,0x00]
237+
; CHECK: lock *(u64 *)(r1 + 0) &= r2
238+
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x50,0x00,0x00,0x00]
239239
; CHECK: w0 = 0
240240
define dso_local i32 @test_atomic_and_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
241241
entry:
@@ -244,8 +244,8 @@ entry:
244244
}
245245

246246
; CHECK-LABEL: test_atomic_or_64
247-
; CHECK: r2 = atomic_fetch_or((u64 *)(r1 + 0), r2)
248-
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x41,0x00,0x00,0x00]
247+
; CHECK: lock *(u64 *)(r1 + 0) |= r2
248+
; CHECK: encoding: [0xdb,0x21,0x00,0x00,0x40,0x00,0x00,0x00]
249249
; CHECK: w0 = 0
250250
define dso_local i32 @test_atomic_or_64(ptr nocapture %p, i64 %v) local_unnamed_addr {
251251
entry:

0 commit comments

Comments
 (0)