Skip to content

Commit 63f9829

Browse files
authored
[SPARC] Prefer RDPC over CALL to implement GETPCX for 64-bit target (#77196)
On 64-bit target, prefer usng RDPC over CALL to get the value of %pc. This is faster on modern processors (Niagara T1 and newer) and avoids polluting the processor's predictor state. The old behavior of using a fake CALL is still done when tuning for classic UltraSPARC processors, since RDPC is much slower there. A quick pgbench test on a SPARC T4 shows about 2% speedup on SELECT loads, and about 7% speedup on INSERT/UPDATE loads.
1 parent 72990df commit 63f9829

File tree

4 files changed

+138
-7
lines changed

4 files changed

+138
-7
lines changed

llvm/lib/Target/Sparc/Sparc.td

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,13 @@ def UsePopc : SubtargetFeature<"popc", "UsePopc", "true",
6262
def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
6363
"Use software emulation for floating point">;
6464

65+
//===----------------------------------------------------------------------===//
66+
// SPARC Subtarget tuning features.
67+
//
68+
69+
def TuneSlowRDPC : SubtargetFeature<"slow-rdpc", "HasSlowRDPC", "true",
70+
"rd %pc, %XX is slow", [FeatureV9]>;
71+
6572
//==== Features added predmoninantly for LEON subtarget support
6673
include "LeonFeatures.td"
6774

@@ -89,8 +96,9 @@ def SparcAsmParserVariant : AsmParserVariant {
8996
// SPARC processors supported.
9097
//===----------------------------------------------------------------------===//
9198

92-
class Proc<string Name, list<SubtargetFeature> Features>
93-
: Processor<Name, NoItineraries, Features>;
99+
class Proc<string Name, list<SubtargetFeature> Features,
100+
list<SubtargetFeature> TuneFeatures = []>
101+
: Processor<Name, NoItineraries, Features, TuneFeatures>;
94102

95103
def : Proc<"generic", []>;
96104
def : Proc<"v7", [FeatureSoftMulDiv, FeatureNoFSMULD]>;
@@ -118,9 +126,11 @@ def : Proc<"ma2480", [FeatureLeon, LeonCASA]>;
118126
def : Proc<"ma2485", [FeatureLeon, LeonCASA]>;
119127
def : Proc<"ma2x8x", [FeatureLeon, LeonCASA]>;
120128
def : Proc<"v9", [FeatureV9]>;
121-
def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
129+
def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated, FeatureVIS],
130+
[TuneSlowRDPC]>;
122131
def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated, FeatureVIS,
123-
FeatureVIS2]>;
132+
FeatureVIS2],
133+
[TuneSlowRDPC]>;
124134
def : Proc<"niagara", [FeatureV9, FeatureV8Deprecated, FeatureVIS,
125135
FeatureVIS2]>;
126136
def : Proc<"niagara2", [FeatureV9, FeatureV8Deprecated, UsePopc,

llvm/lib/Target/Sparc/SparcAsmPrinter.cpp

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include "MCTargetDesc/SparcInstPrinter.h"
1515
#include "MCTargetDesc/SparcMCExpr.h"
16+
#include "MCTargetDesc/SparcMCTargetDesc.h"
1617
#include "MCTargetDesc/SparcTargetStreamer.h"
1718
#include "Sparc.h"
1819
#include "SparcInstrInfo.h"
@@ -111,6 +112,15 @@ static void EmitCall(MCStreamer &OutStreamer,
111112
OutStreamer.emitInstruction(CallInst, STI);
112113
}
113114

115+
static void EmitRDPC(MCStreamer &OutStreamer, MCOperand &RD,
116+
const MCSubtargetInfo &STI) {
117+
MCInst RDPCInst;
118+
RDPCInst.setOpcode(SP::RDASR);
119+
RDPCInst.addOperand(RD);
120+
RDPCInst.addOperand(MCOperand::createReg(SP::ASR5));
121+
OutStreamer.emitInstruction(RDPCInst, STI);
122+
}
123+
114124
static void EmitSETHI(MCStreamer &OutStreamer,
115125
MCOperand &Imm, MCOperand &RD,
116126
const MCSubtargetInfo &STI)
@@ -226,16 +236,25 @@ void SparcAsmPrinter::LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
226236
MCOperand RegO7 = MCOperand::createReg(SP::O7);
227237

228238
// <StartLabel>:
229-
// call <EndLabel>
239+
// <GET-PC> // This will be either `call <EndLabel>` or `rd %pc, %o7`.
230240
// <SethiLabel>:
231241
// sethi %hi(_GLOBAL_OFFSET_TABLE_+(<SethiLabel>-<StartLabel>)), <MO>
232242
// <EndLabel>:
233243
// or <MO>, %lo(_GLOBAL_OFFSET_TABLE_+(<EndLabel>-<StartLabel>))), <MO>
234244
// add <MO>, %o7, <MO>
235245

236246
OutStreamer->emitLabel(StartLabel);
237-
MCOperand Callee = createPCXCallOP(EndLabel, OutContext);
238-
EmitCall(*OutStreamer, Callee, STI);
247+
if (!STI.getTargetTriple().isSPARC64() ||
248+
STI.hasFeature(Sparc::TuneSlowRDPC)) {
249+
MCOperand Callee = createPCXCallOP(EndLabel, OutContext);
250+
EmitCall(*OutStreamer, Callee, STI);
251+
} else {
252+
// TODO find out whether it is possible to store PC
253+
// in other registers, to enable leaf function optimization.
254+
// (On the other hand, approx. over 97.8% of GETPCXes happen
255+
// in non-leaf functions, so would this be worth the effort?)
256+
EmitRDPC(*OutStreamer, RegO7, STI);
257+
}
239258
OutStreamer->emitLabel(SethiLabel);
240259
MCOperand hiImm = createPCXRelExprOp(SparcMCExpr::VK_Sparc_PC22,
241260
GOTLabel, StartLabel, SethiLabel,
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -relocation-model=pic -mtriple=sparc | FileCheck --check-prefix=SPARC %s
3+
; RUN: llc < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s
4+
5+
;; SPARC32 and SPARC64 for classic UltraSPARCs implement GETPCX
6+
;; with a fake `call`.
7+
;; All other SPARC64 targets implement it with `rd %pc, %o7`.
8+
;; Need to do the tests in separate files because apparently `tune-cpu`
9+
;; attribute applies to the entire file at once.
10+
11+
@value = external global i32
12+
13+
define i32 @testCall() nounwind #0 {
14+
; SPARC-LABEL: testCall:
15+
; SPARC: ! %bb.0:
16+
; SPARC-NEXT: save %sp, -96, %sp
17+
; SPARC-NEXT: .Ltmp0:
18+
; SPARC-NEXT: call .Ltmp1
19+
; SPARC-NEXT: .Ltmp2:
20+
; SPARC-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
21+
; SPARC-NEXT: .Ltmp1:
22+
; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
23+
; SPARC-NEXT: add %i0, %o7, %i0
24+
; SPARC-NEXT: sethi %hi(value), %i1
25+
; SPARC-NEXT: add %i1, %lo(value), %i1
26+
; SPARC-NEXT: ld [%i0+%i1], %i0
27+
; SPARC-NEXT: ld [%i0], %i0
28+
; SPARC-NEXT: ret
29+
; SPARC-NEXT: restore
30+
;
31+
; SPARC64-LABEL: testCall:
32+
; SPARC64: ! %bb.0:
33+
; SPARC64-NEXT: save %sp, -128, %sp
34+
; SPARC64-NEXT: .Ltmp0:
35+
; SPARC64-NEXT: call .Ltmp1
36+
; SPARC64-NEXT: .Ltmp2:
37+
; SPARC64-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
38+
; SPARC64-NEXT: .Ltmp1:
39+
; SPARC64-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
40+
; SPARC64-NEXT: add %i0, %o7, %i0
41+
; SPARC64-NEXT: sethi %hi(value), %i1
42+
; SPARC64-NEXT: add %i1, %lo(value), %i1
43+
; SPARC64-NEXT: ldx [%i0+%i1], %i0
44+
; SPARC64-NEXT: ld [%i0], %i0
45+
; SPARC64-NEXT: ret
46+
; SPARC64-NEXT: restore
47+
%1 = load i32, ptr @value
48+
ret i32 %1
49+
}
50+
51+
attributes #0 = { "tune-cpu"="ultrasparc" }
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -relocation-model=pic -mtriple=sparc | FileCheck --check-prefix=SPARC %s
3+
; RUN: llc < %s -relocation-model=pic -mtriple=sparcv9 | FileCheck --check-prefix=SPARC64 %s
4+
5+
;; SPARC32 and SPARC64 for classic UltraSPARCs implement GETPCX
6+
;; with a fake `call`.
7+
;; All other SPARC64 targets implement it with `rd %pc, %o7`.
8+
;; Need to do the tests in separate files because apparently `tune-cpu`
9+
;; attribute applies to the entire file at once.
10+
11+
@value = external global i32
12+
13+
define i32 @testRdpc() nounwind #0 {
14+
; SPARC-LABEL: testRdpc:
15+
; SPARC: ! %bb.0:
16+
; SPARC-NEXT: save %sp, -96, %sp
17+
; SPARC-NEXT: .Ltmp0:
18+
; SPARC-NEXT: call .Ltmp1
19+
; SPARC-NEXT: .Ltmp2:
20+
; SPARC-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
21+
; SPARC-NEXT: .Ltmp1:
22+
; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
23+
; SPARC-NEXT: add %i0, %o7, %i0
24+
; SPARC-NEXT: sethi %hi(value), %i1
25+
; SPARC-NEXT: add %i1, %lo(value), %i1
26+
; SPARC-NEXT: ld [%i0+%i1], %i0
27+
; SPARC-NEXT: ld [%i0], %i0
28+
; SPARC-NEXT: ret
29+
; SPARC-NEXT: restore
30+
;
31+
; SPARC64-LABEL: testRdpc:
32+
; SPARC64: ! %bb.0:
33+
; SPARC64-NEXT: save %sp, -128, %sp
34+
; SPARC64-NEXT: .Ltmp0:
35+
; SPARC64-NEXT: rd %pc, %o7
36+
; SPARC64-NEXT: .Ltmp2:
37+
; SPARC64-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
38+
; SPARC64-NEXT: .Ltmp1:
39+
; SPARC64-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
40+
; SPARC64-NEXT: add %i0, %o7, %i0
41+
; SPARC64-NEXT: sethi %hi(value), %i1
42+
; SPARC64-NEXT: add %i1, %lo(value), %i1
43+
; SPARC64-NEXT: ldx [%i0+%i1], %i0
44+
; SPARC64-NEXT: ld [%i0], %i0
45+
; SPARC64-NEXT: ret
46+
; SPARC64-NEXT: restore
47+
%1 = load i32, ptr @value
48+
ret i32 %1
49+
}
50+
51+
attributes #0 = { "tune-cpu"="niagara" }

0 commit comments

Comments
 (0)