Skip to content

Commit 6b85d8e

Browse files
committed
[WIP][LLVM] Add __builtin_readfixedtimer intrinsic and buiiltin
Summary: This patch adds a new intrinsic and builtin function mirroring the existing `__builtin_readcyclecounter`. The difference is that this implementation targets a separate counter that some targets have which returns a fixed frequency clock that can be used to determine elapsed time, this is different compared to the cycle counter which often has variable frequency. This is currently only valid for the NVPTX and AMDGPU targets.
1 parent 9673741 commit 6b85d8e

26 files changed

+155
-7
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2764,6 +2764,37 @@ Query for this feature with ``__has_builtin(__builtin_readcyclecounter)``. Note
27642764
that even if present, its use may depend on run-time privilege or other OS
27652765
controlled state.
27662766
2767+
``__builtin_readfixedtimer``
2768+
------------------------------
2769+
2770+
``__builtin_readfixedtimer`` is used to access the fixed frequency counter
2771+
register (or a similar steady-rate clock) on those targets that support it.
2772+
The function is similar to ``__builtin_readcyclecounter`` above except that the
2773+
frequency is fixed, making it suitable for measuring elapsed time.
2774+
2775+
**Syntax**:
2776+
2777+
.. code-block:: c++
2778+
2779+
__builtin_readfixedtimer()
2780+
2781+
**Example of Use**:
2782+
2783+
.. code-block:: c++
2784+
2785+
unsigned long long t0 = __builtin_readfixedtimer();
2786+
do_something();
2787+
unsigned long long t1 = __builtin_readfixedtimer();
2788+
unsigned long long secs_to_do_something = (t1 - t0) / tick_rate;
2789+
2790+
**Description**:
2791+
2792+
The ``__builtin_readfixedtimer()`` builtin returns the frequency counter value.
2793+
When not supported by the target, the return value is always zero. This builtin
2794+
takes no arguments and produces an unsigned long long result.
2795+
2796+
Query for this feature with ``__has_builtin(__builtin_readfixedtimer)``.
2797+
27672798
``__builtin_dump_struct``
27682799
-------------------------
27692800

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,12 @@ def ReadCycleCounter : Builtin {
11101110
let Prototype = "unsigned long long int()";
11111111
}
11121112

1113+
def ReadFixedTimer : Builtin {
1114+
let Spellings = ["__builtin_readfixedtimer"];
1115+
let Attributes = [NoThrow];
1116+
let Prototype = "unsigned long long int()";
1117+
}
1118+
11131119
def Trap : Builtin {
11141120
let Spellings = ["__builtin_trap"];
11151121
let Attributes = [NoThrow, NoReturn];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3443,6 +3443,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
34433443
Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
34443444
return RValue::get(Builder.CreateCall(F));
34453445
}
3446+
case Builtin::BI__builtin_readfixedtimer: {
3447+
Function *F = CGM.getIntrinsic(Intrinsic::readfixedtimer);
3448+
return RValue::get(Builder.CreateCall(F));
3449+
}
34463450
case Builtin::BI__builtin___clear_cache: {
34473451
Value *Begin = EmitScalarExpr(E->getArg(0));
34483452
Value *End = EmitScalarExpr(E->getArg(1));

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,6 +1179,12 @@ enum NodeType {
11791179
/// counter-like register (or other high accuracy low latency clock source).
11801180
READCYCLECOUNTER,
11811181

1182+
/// READFIXEDTIMER - This corresponds to the readfixedcounter intrinsic.
1183+
/// It has the same semantics as the READCYCLECOUNTER implementation except
1184+
/// that the result is the content of the architecture-specific fixed
1185+
/// frequency counter suitable for measuring elapsed time.
1186+
READFIXEDTIMER,
1187+
11821188
/// HANDLENODE node - Used as a handle for various purposes.
11831189
HANDLENODE,
11841190

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,8 @@ def int_pcmarker : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
870870

871871
def int_readcyclecounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>;
872872

873+
def int_readfixedtimer : DefaultAttrsIntrinsic<[llvm_i64_ty]>;
874+
873875
// The assume intrinsic is marked InaccessibleMemOnly so that proper control
874876
// dependencies will be maintained.
875877
def int_assume : DefaultAttrsIntrinsic<

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,9 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUNDEVEN)
352352
/// INTRINSIC readcyclecounter
353353
HANDLE_TARGET_OPCODE(G_READCYCLECOUNTER)
354354

355+
/// INTRINSIC readfixedtimer
356+
HANDLE_TARGET_OPCODE(G_READFIXEDTIMER)
357+
355358
/// Generic load (including anyext load)
356359
HANDLE_TARGET_OPCODE(G_LOAD)
357360

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,6 +1101,12 @@ def G_READCYCLECOUNTER : GenericInstruction {
11011101
let hasSideEffects = true;
11021102
}
11031103

1104+
def G_READFIXEDTIMER : GenericInstruction {
1105+
let OutOperandList = (outs type0:$dst);
1106+
let InOperandList = (ins);
1107+
let hasSideEffects = true;
1108+
}
1109+
11041110
//------------------------------------------------------------------------------
11051111
// Memory ops
11061112
//------------------------------------------------------------------------------

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>;
167167
def : GINodeEquiv<G_FMAXIMUM, fmaximum>;
168168
def : GINodeEquiv<G_FMINIMUM, fminimum>;
169169
def : GINodeEquiv<G_READCYCLECOUNTER, readcyclecounter>;
170+
def : GINodeEquiv<G_READFIXEDTIMER, readfixedtimer>;
170171
def : GINodeEquiv<G_ROTR, rotr>;
171172
def : GINodeEquiv<G_ROTL, rotl>;
172173
def : GINodeEquiv<G_LROUND, lround>;

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,9 @@ def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch,
657657
def readcyclecounter : SDNode<"ISD::READCYCLECOUNTER", SDTIntLeaf,
658658
[SDNPHasChain, SDNPSideEffect]>;
659659

660+
def readfixedtimer : SDNode<"ISD::READFIXEDTIMER", SDTIntLeaf,
661+
[SDNPHasChain, SDNPSideEffect]>;
662+
660663
def membarrier : SDNode<"ISD::MEMBARRIER", SDTNone,
661664
[SDNPHasChain, SDNPSideEffect]>;
662665

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,6 +1885,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
18851885
return TargetOpcode::G_INTRINSIC_TRUNC;
18861886
case Intrinsic::readcyclecounter:
18871887
return TargetOpcode::G_READCYCLECOUNTER;
1888+
case Intrinsic::readfixedtimer:
1889+
return TargetOpcode::G_READFIXEDTIMER;
18881890
case Intrinsic::ptrmask:
18891891
return TargetOpcode::G_PTRMASK;
18901892
case Intrinsic::lrint:

llvm/lib/CodeGen/IntrinsicLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
312312
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
313313
break;
314314
}
315+
case Intrinsic::readfixedtimer: {
316+
errs() << "WARNING: this target does not support the llvm.readfixedtimer"
317+
<< " intrinsic. It is being lowered to a constant 0\n";
318+
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
319+
break;
320+
}
315321

316322
case Intrinsic::dbg_declare:
317323
case Intrinsic::dbg_label:

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,8 +1127,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
11271127
Action = TargetLowering::Custom;
11281128
break;
11291129
case ISD::READCYCLECOUNTER:
1130-
// READCYCLECOUNTER returns an i64, even if type legalization might have
1131-
// expanded that to several smaller types.
1130+
case ISD::READFIXEDTIMER:
1131+
// READCYCLECOUNTER and READFIXEDTIMER return a i64, even if type
1132+
// legalization might have expanded that to several smaller types.
11321133
Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
11331134
break;
11341135
case ISD::READ_REGISTER:
@@ -3080,6 +3081,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
30803081
Results.push_back(Node->getOperand(0));
30813082
break;
30823083
case ISD::READCYCLECOUNTER:
3084+
case ISD::READFIXEDTIMER:
30833085
// If the target didn't expand this, just return 'zero' and preserve the
30843086
// chain.
30853087
Results.append(Node->getNumValues() - 1,

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2648,7 +2648,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
26482648
case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break;
26492649
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
26502650
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
2651-
case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
2651+
case ISD::READCYCLECOUNTER:
2652+
case ISD::READFIXEDTIMER: ExpandIntRes_READCOUNTER(N, Lo, Hi); break;
26522653
case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
26532654
case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
26542655
case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
@@ -4026,8 +4027,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
40264027
Lo, Hi);
40274028
}
40284029

4029-
void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
4030-
SDValue &Hi) {
4030+
void DAGTypeLegalizer::ExpandIntRes_READCOUNTER(SDNode *N, SDValue &Lo,
4031+
SDValue &Hi) {
40314032
SDLoc DL(N);
40324033
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
40334034
SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
439439
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
440440
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
441441
void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
442-
void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
442+
void ExpandIntRes_READCOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
443443
void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
444444
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
445445
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6781,6 +6781,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
67816781
DAG.setRoot(Res.getValue(1));
67826782
return;
67836783
}
6784+
case Intrinsic::readfixedtimer: {
6785+
SDValue Op = getRoot();
6786+
Res = DAG.getNode(ISD::READFIXEDTIMER, sdl,
6787+
DAG.getVTList(MVT::i64, MVT::Other), Op);
6788+
setValue(&I, Res);
6789+
DAG.setRoot(Res.getValue(1));
6790+
return;
6791+
}
67846792
case Intrinsic::bitreverse:
67856793
setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
67866794
getValue(I.getArgOperand(0)).getValueType(),

llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
104104
case ISD::ATOMIC_STORE: return "AtomicStore";
105105
case ISD::PCMARKER: return "PCMarker";
106106
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
107+
case ISD::READFIXEDTIMER: return "ReadFixedTimer";
107108
case ISD::SRCVALUE: return "SrcValue";
108109
case ISD::MDNODE_SDNODE: return "MDNode";
109110
case ISD::EntryToken: return "EntryToken";

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,9 @@ void TargetLoweringBase::initActions() {
964964
// Most targets also ignore the @llvm.readcyclecounter intrinsic.
965965
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
966966

967+
// Most targets also ignore the @llvm.readfixedtimer intrinsic.
968+
setOperationAction(ISD::READFIXEDTIMER, MVT::i64, Expand);
969+
967970
// ConstantFP nodes default to expand. Targets can either change this to
968971
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
969972
// to optimize expansions for certain constants.

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1957,6 +1957,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
19571957
getActionDefinitionsBuilder(G_READCYCLECOUNTER)
19581958
.legalFor({S64});
19591959

1960+
getActionDefinitionsBuilder(G_READFIXEDTIMER).legalFor({S64});
1961+
19601962
getActionDefinitionsBuilder(G_FENCE)
19611963
.alwaysLegal();
19621964

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4051,6 +4051,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
40514051
case AMDGPU::G_CONSTANT:
40524052
case AMDGPU::G_GLOBAL_VALUE:
40534053
case AMDGPU::G_BLOCK_ADDR:
4054+
case AMDGPU::G_READFIXEDTIMER:
40544055
case AMDGPU::G_READCYCLECOUNTER: {
40554056
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
40564057
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
468468

469469
// On SI this is s_memtime and s_memrealtime on VI.
470470
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
471+
472+
if (Subtarget->hasSMemRealTime() ||
473+
Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11)
474+
setOperationAction(ISD::READFIXEDTIMER, MVT::i64, Legal);
471475
setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Custom);
472476

473477
if (Subtarget->has16BitInsts()) {

llvm/lib/Target/AMDGPU/SMInstructions.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,6 +1068,20 @@ def : GCNPat <
10681068
}
10691069
} // let OtherPredicates = [HasShaderCyclesRegister]
10701070

1071+
let OtherPredicates = [HasSMemRealTime] in {
1072+
def : GCNPat <
1073+
(i64 (readfixedtimer)),
1074+
(S_MEMREALTIME)
1075+
>;
1076+
} // let OtherPredicates = [HasSMemRealTime]
1077+
1078+
let SubtargetPredicate = isGFX11Plus in {
1079+
def : GCNPat <
1080+
(i64 (readfixedtimer)),
1081+
(S_SENDMSG_RTN_B64 (i32 /*MSG_RTN_GET_REALTIME=*/0x83))
1082+
>;
1083+
} // let SubtargetPredicate = [isGFX11Plus]
1084+
10711085
def i32imm_zero : TImmLeaf <i32, [{
10721086
return Imm == 0;
10731087
}]>;

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -489,6 +489,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
489489
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f16, Expand);
490490
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand);
491491

492+
if (STI.getSmVersion() >= 30 && STI.getPTXVersion() > 31)
493+
setOperationAction(ISD::READFIXEDTIMER, MVT::i64, Legal);
494+
492495
setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
493496
setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
494497

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3759,7 +3759,6 @@ def CALL_PROTOTYPE :
37593759

37603760
include "NVPTXIntrinsics.td"
37613761

3762-
37633762
//-----------------------------------
37643763
// Notes
37653764
//-----------------------------------

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6372,12 +6372,16 @@ def INT_PTX_SREG_LANEMASK_GE :
63726372
def INT_PTX_SREG_LANEMASK_GT :
63736373
PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>;
63746374

6375+
let hasSideEffects = 1 in {
63756376
def INT_PTX_SREG_CLOCK :
63766377
PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>;
63776378
def INT_PTX_SREG_CLOCK64 :
63786379
PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>;
63796380
def INT_PTX_SREG_GLOBALTIMER :
63806381
PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>;
6382+
}
6383+
6384+
def: Pat <(i64 (readfixedtimer)), (INT_PTX_SREG_GLOBALTIMER)>;
63816385

63826386
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
63836387
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX700
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
4+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100
5+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100
6+
7+
declare i64 @llvm.readfixedtimer() #0
8+
9+
; GCN-LABEL: {{^}}test_readfixedtimer:
10+
; GFX700: s_mov_b32 s[[REG:[0-9]+]], 0
11+
; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
12+
; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
13+
; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME)
14+
; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME)
15+
define amdgpu_kernel void @test_readfixedtimer(ptr addrspace(1) %out) #0 {
16+
%cycle0 = call i64 @llvm.readfixedtimer()
17+
store volatile i64 %cycle0, ptr addrspace(1) %out
18+
19+
%cycle1 = call i64 @llvm.readfixedtimer()
20+
store volatile i64 %cycle1, ptr addrspace(1) %out
21+
ret void
22+
}
23+
24+
attributes #0 = { nounwind }

llvm/test/CodeGen/NVPTX/intrinsics.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,17 @@ define i64 @test_globaltimer() {
151151
ret i64 %ret
152152
}
153153

154+
; CHECK-LABEL: test_fixedtimer
155+
define i64 @test_fixedtimer() {
156+
; CHECK: mov.u64 %r{{.*}}, %globaltimer;
157+
%a = tail call i64 @llvm.readfixedtimer()
158+
; CHECK: mov.u64 %r{{.*}}, %globaltimer;
159+
%b = tail call i64 @llvm.readfixedtimer()
160+
%ret = add i64 %a, %b
161+
; CHECK: ret
162+
ret i64 %ret
163+
}
164+
154165
declare float @llvm.fabs.f32(float)
155166
declare double @llvm.fabs.f64(double)
156167
declare float @llvm.nvvm.sqrt.f(float)
@@ -161,6 +172,7 @@ declare i16 @llvm.ctpop.i16(i16)
161172
declare i32 @llvm.ctpop.i32(i32)
162173
declare i64 @llvm.ctpop.i64(i64)
163174

175+
declare i64 @llvm.readfixedtimer()
164176
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
165177
declare i32 @llvm.nvvm.read.ptx.sreg.clock()
166178
declare i64 @llvm.nvvm.read.ptx.sreg.clock64()

0 commit comments

Comments
 (0)