Skip to content

Commit 11fcae6

Browse files
authored
[LLVM] Add __builtin_readsteadycounter intrinsic and builtin for realtime clocks (#81331)
Summary: This patch adds a new intrinsic and builtin function mirroring the existing `__builtin_readcyclecounter`. The difference is that this implementation targets a separate counter that some targets have which returns a fixed frequency clock that can be used to determine elapsed time, this is different compared to the cycle counter which often has variable frequency. This patch only adds support for the NVPTX and AMDGPU targets. This is done as a new and separate builtin rather than an argument to `readcyclecounter` to avoid needing to change existing code and to make the separation more explicit.
1 parent 381a00d commit 11fcae6

35 files changed

+229
-72
lines changed

clang/docs/LanguageExtensions.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2764,6 +2764,39 @@ Query for this feature with ``__has_builtin(__builtin_readcyclecounter)``. Note
27642764
that even if present, its use may depend on run-time privilege or other OS
27652765
controlled state.
27662766
2767+
``__builtin_readsteadycounter``
2768+
------------------------------
2769+
2770+
``__builtin_readsteadycounter`` is used to access the fixed frequency counter
2771+
register (or a similar steady-rate clock) on those targets that support it.
2772+
The function is similar to ``__builtin_readcyclecounter`` above except that the
2773+
frequency is fixed, making it suitable for measuring elapsed time.
2774+
2775+
**Syntax**:
2776+
2777+
.. code-block:: c++
2778+
2779+
__builtin_readsteadycounter()
2780+
2781+
**Example of Use**:
2782+
2783+
.. code-block:: c++
2784+
2785+
unsigned long long t0 = __builtin_readsteadycounter();
2786+
do_something();
2787+
unsigned long long t1 = __builtin_readsteadycounter();
2788+
unsigned long long secs_to_do_something = (t1 - t0) / tick_rate;
2789+
2790+
**Description**:
2791+
2792+
The ``__builtin_readsteadycounter()`` builtin returns the frequency counter value.
2793+
When not supported by the target, the return value is always zero. This builtin
2794+
takes no arguments and produces an unsigned long long result. The builtin does
2795+
not guarantee any particular frequency, only that it is stable. Knowledge of the
2796+
counter's true frequency will need to be provided by the user.
2797+
2798+
Query for this feature with ``__has_builtin(__builtin_readsteadycounter)``.
2799+
27672800
``__builtin_dump_struct``
27682801
-------------------------
27692802

clang/docs/ReleaseNotes.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ C23 Feature Support
117117
Non-comprehensive list of changes in this release
118118
-------------------------------------------------
119119

120+
- Added ``__builtin_readsteadycounter`` for reading fixed frequency hardware
121+
counters.
122+
120123
New Compiler Flags
121124
------------------
122125

clang/include/clang/Basic/Builtins.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,6 +1110,12 @@ def ReadCycleCounter : Builtin {
11101110
let Prototype = "unsigned long long int()";
11111111
}
11121112

1113+
def ReadSteadyCounter : Builtin {
1114+
let Spellings = ["__builtin_readsteadycounter"];
1115+
let Attributes = [NoThrow];
1116+
let Prototype = "unsigned long long int()";
1117+
}
1118+
11131119
def Trap : Builtin {
11141120
let Spellings = ["__builtin_trap"];
11151121
let Attributes = [NoThrow, NoReturn];

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3443,6 +3443,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
34433443
Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
34443444
return RValue::get(Builder.CreateCall(F));
34453445
}
3446+
case Builtin::BI__builtin_readsteadycounter: {
3447+
Function *F = CGM.getIntrinsic(Intrinsic::readsteadycounter);
3448+
return RValue::get(Builder.CreateCall(F));
3449+
}
34463450
case Builtin::BI__builtin___clear_cache: {
34473451
Value *Begin = EmitScalarExpr(E->getArg(0));
34483452
Value *End = EmitScalarExpr(E->getArg(1));

clang/test/CodeGen/builtins.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,12 @@ long long test_builtin_readcyclecounter(void) {
496496
return __builtin_readcyclecounter();
497497
}
498498

499+
// CHECK-LABEL: define{{.*}} i64 @test_builtin_readsteadycounter
500+
long long test_builtin_readsteadycounter(void) {
501+
// CHECK: call i64 @llvm.readsteadycounter()
502+
return __builtin_readsteadycounter();
503+
}
504+
499505
/// __builtin_launder should be a NOP in C since there are no vtables.
500506
// CHECK-LABEL: define{{.*}} void @test_builtin_launder
501507
void test_builtin_launder(int *p) {

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,6 +1179,12 @@ enum NodeType {
11791179
/// counter-like register (or other high accuracy low latency clock source).
11801180
READCYCLECOUNTER,
11811181

1182+
/// READSTEADYCOUNTER - This corresponds to the readfixedcounter intrinsic.
1183+
/// It has the same semantics as the READCYCLECOUNTER implementation except
1184+
/// that the result is the content of the architecture-specific fixed
1185+
/// frequency counter suitable for measuring elapsed time.
1186+
READSTEADYCOUNTER,
1187+
11821188
/// HANDLENODE node - Used as a handle for various purposes.
11831189
HANDLENODE,
11841190

llvm/include/llvm/IR/Intrinsics.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -870,6 +870,8 @@ def int_pcmarker : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
870870

871871
def int_readcyclecounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>;
872872

873+
def int_readsteadycounter : DefaultAttrsIntrinsic<[llvm_i64_ty]>;
874+
873875
// The assume intrinsic is marked InaccessibleMemOnly so that proper control
874876
// dependencies will be maintained.
875877
def int_assume : DefaultAttrsIntrinsic<

llvm/include/llvm/Support/TargetOpcodes.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,9 @@ HANDLE_TARGET_OPCODE(G_INTRINSIC_ROUNDEVEN)
352352
/// INTRINSIC readcyclecounter
353353
HANDLE_TARGET_OPCODE(G_READCYCLECOUNTER)
354354

355+
/// INTRINSIC readsteadycounter
356+
HANDLE_TARGET_OPCODE(G_READSTEADYCOUNTER)
357+
355358
/// Generic load (including anyext load)
356359
HANDLE_TARGET_OPCODE(G_LOAD)
357360

llvm/include/llvm/Target/GenericOpcodes.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,6 +1101,12 @@ def G_READCYCLECOUNTER : GenericInstruction {
11011101
let hasSideEffects = true;
11021102
}
11031103

1104+
def G_READSTEADYCOUNTER : GenericInstruction {
1105+
let OutOperandList = (outs type0:$dst);
1106+
let InOperandList = (ins);
1107+
let hasSideEffects = true;
1108+
}
1109+
11041110
//------------------------------------------------------------------------------
11051111
// Memory ops
11061112
//------------------------------------------------------------------------------

llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ def : GINodeEquiv<G_FMAXNUM_IEEE, fmaxnum_ieee>;
168168
def : GINodeEquiv<G_FMAXIMUM, fmaximum>;
169169
def : GINodeEquiv<G_FMINIMUM, fminimum>;
170170
def : GINodeEquiv<G_READCYCLECOUNTER, readcyclecounter>;
171+
def : GINodeEquiv<G_READSTEADYCOUNTER, readsteadycounter>;
171172
def : GINodeEquiv<G_ROTR, rotr>;
172173
def : GINodeEquiv<G_ROTL, rotl>;
173174
def : GINodeEquiv<G_LROUND, lround>;

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,9 @@ def prefetch : SDNode<"ISD::PREFETCH" , SDTPrefetch,
657657
def readcyclecounter : SDNode<"ISD::READCYCLECOUNTER", SDTIntLeaf,
658658
[SDNPHasChain, SDNPSideEffect]>;
659659

660+
def readsteadycounter : SDNode<"ISD::READSTEADYCOUNTER", SDTIntLeaf,
661+
[SDNPHasChain, SDNPSideEffect]>;
662+
660663
def membarrier : SDNode<"ISD::MEMBARRIER", SDTNone,
661664
[SDNPHasChain, SDNPSideEffect]>;
662665

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,6 +1885,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
18851885
return TargetOpcode::G_INTRINSIC_TRUNC;
18861886
case Intrinsic::readcyclecounter:
18871887
return TargetOpcode::G_READCYCLECOUNTER;
1888+
case Intrinsic::readsteadycounter:
1889+
return TargetOpcode::G_READSTEADYCOUNTER;
18881890
case Intrinsic::ptrmask:
18891891
return TargetOpcode::G_PTRMASK;
18901892
case Intrinsic::lrint:

llvm/lib/CodeGen/IntrinsicLowering.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
312312
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
313313
break;
314314
}
315+
case Intrinsic::readsteadycounter: {
316+
errs() << "WARNING: this target does not support the llvm.readsteadycounter"
317+
<< " intrinsic. It is being lowered to a constant 0\n";
318+
CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
319+
break;
320+
}
315321

316322
case Intrinsic::dbg_declare:
317323
case Intrinsic::dbg_label:

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1127,8 +1127,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
11271127
Action = TargetLowering::Custom;
11281128
break;
11291129
case ISD::READCYCLECOUNTER:
1130-
// READCYCLECOUNTER returns an i64, even if type legalization might have
1131-
// expanded that to several smaller types.
1130+
case ISD::READSTEADYCOUNTER:
1131+
// READCYCLECOUNTER and READSTEADYCOUNTER return a i64, even if type
1132+
// legalization might have expanded that to several smaller types.
11321133
Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64);
11331134
break;
11341135
case ISD::READ_REGISTER:
@@ -3080,6 +3081,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
30803081
Results.push_back(Node->getOperand(0));
30813082
break;
30823083
case ISD::READCYCLECOUNTER:
3084+
case ISD::READSTEADYCOUNTER:
30833085
// If the target didn't expand this, just return 'zero' and preserve the
30843086
// chain.
30853087
Results.append(Node->getNumValues() - 1,

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2648,7 +2648,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
26482648
case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break;
26492649
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
26502650
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
2651-
case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
2651+
case ISD::READCYCLECOUNTER:
2652+
case ISD::READSTEADYCOUNTER: ExpandIntRes_READCOUNTER(N, Lo, Hi); break;
26522653
case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break;
26532654
case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
26542655
case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
@@ -4031,8 +4032,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
40314032
Lo, Hi);
40324033
}
40334034

4034-
void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo,
4035-
SDValue &Hi) {
4035+
void DAGTypeLegalizer::ExpandIntRes_READCOUNTER(SDNode *N, SDValue &Lo,
4036+
SDValue &Hi) {
40364037
SDLoc DL(N);
40374038
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
40384039
SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other);

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
439439
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
440440
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
441441
void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
442-
void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
442+
void ExpandIntRes_READCOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi);
443443
void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
444444
void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
445445
void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi);

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6781,6 +6781,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
67816781
DAG.setRoot(Res.getValue(1));
67826782
return;
67836783
}
6784+
case Intrinsic::readsteadycounter: {
6785+
SDValue Op = getRoot();
6786+
Res = DAG.getNode(ISD::READSTEADYCOUNTER, sdl,
6787+
DAG.getVTList(MVT::i64, MVT::Other), Op);
6788+
setValue(&I, Res);
6789+
DAG.setRoot(Res.getValue(1));
6790+
return;
6791+
}
67846792
case Intrinsic::bitreverse:
67856793
setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
67866794
getValue(I.getArgOperand(0)).getValueType(),

llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
104104
case ISD::ATOMIC_STORE: return "AtomicStore";
105105
case ISD::PCMARKER: return "PCMarker";
106106
case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
107+
case ISD::READSTEADYCOUNTER: return "ReadSteadyCounter";
107108
case ISD::SRCVALUE: return "SrcValue";
108109
case ISD::MDNODE_SDNODE: return "MDNode";
109110
case ISD::EntryToken: return "EntryToken";

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,9 @@ void TargetLoweringBase::initActions() {
964964
// Most targets also ignore the @llvm.readcyclecounter intrinsic.
965965
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand);
966966

967+
// Most targets also ignore the @llvm.readsteadycounter intrinsic.
968+
setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Expand);
969+
967970
// ConstantFP nodes default to expand. Targets can either change this to
968971
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
969972
// to optimize expansions for certain constants.

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1988,6 +1988,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
19881988
getActionDefinitionsBuilder(G_READCYCLECOUNTER)
19891989
.legalFor({S64});
19901990

1991+
getActionDefinitionsBuilder(G_READSTEADYCOUNTER).legalFor({S64});
1992+
19911993
getActionDefinitionsBuilder(G_FENCE)
19921994
.alwaysLegal();
19931995

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4051,6 +4051,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
40514051
case AMDGPU::G_CONSTANT:
40524052
case AMDGPU::G_GLOBAL_VALUE:
40534053
case AMDGPU::G_BLOCK_ADDR:
4054+
case AMDGPU::G_READSTEADYCOUNTER:
40544055
case AMDGPU::G_READCYCLECOUNTER: {
40554056
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
40564057
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
468468

469469
// On SI this is s_memtime and s_memrealtime on VI.
470470
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
471+
472+
if (Subtarget->hasSMemRealTime() ||
473+
Subtarget->getGeneration() >= AMDGPUSubtarget::GFX11)
474+
setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Legal);
471475
setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Custom);
472476

473477
if (Subtarget->has16BitInsts()) {

llvm/lib/Target/AMDGPU/SMInstructions.td

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,6 +1068,20 @@ def : GCNPat <
10681068
}
10691069
} // let OtherPredicates = [HasShaderCyclesRegister]
10701070

1071+
let OtherPredicates = [HasSMemRealTime] in {
1072+
def : GCNPat <
1073+
(i64 (readsteadycounter)),
1074+
(S_MEMREALTIME)
1075+
>;
1076+
} // let OtherPredicates = [HasSMemRealTime]
1077+
1078+
let SubtargetPredicate = isGFX11Plus in {
1079+
def : GCNPat <
1080+
(i64 (readsteadycounter)),
1081+
(S_SENDMSG_RTN_B64 (i32 /*MSG_RTN_GET_REALTIME=*/0x83))
1082+
>;
1083+
} // let SubtargetPredicate = [isGFX11Plus]
1084+
10711085
def i32imm_zero : TImmLeaf <i32, [{
10721086
return Imm == 0;
10731087
}]>;

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
490490
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f16, Expand);
491491

492492
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
493+
if (STI.getSmVersion() >= 30 && STI.getPTXVersion() > 31)
494+
setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Legal);
493495

494496
setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
495497
setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3805,7 +3805,6 @@ def CALL_PROTOTYPE :
38053805

38063806
include "NVPTXIntrinsics.td"
38073807

3808-
38093808
//-----------------------------------
38103809
// Notes
38113810
//-----------------------------------

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6382,6 +6382,7 @@ def INT_PTX_SREG_GLOBALTIMER :
63826382
}
63836383

63846384
def: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>;
6385+
def: Pat <(i64 (readsteadycounter)), (INT_PTX_SREG_GLOBALTIMER)>;
63856386

63866387
def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>;
63876388
def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>;

llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@
163163
# DEBUG-NEXT: G_READCYCLECOUNTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices
164164
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
165165
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
166+
# DEBUG-NEXT: G_READSTEADYCOUNTER (opcode {{[0-9]+}}): 1 type index, 0 imm indices
167+
# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
168+
# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
166169

167170
# DEBUG-NEXT: G_LOAD (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
168171
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX700
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX900
4+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100
5+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100
6+
7+
declare i64 @llvm.readsteadycounter() #0
8+
9+
; GCN-LABEL: {{^}}test_readsteadycounter:
10+
; GFX700: s_mov_b32 s[[REG:[0-9]+]], 0
11+
; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
12+
; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
13+
; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME)
14+
; GFX1100: s_sendmsg_rtn_b64 s[[[LO:[0-9]+]]:[[HI:[0-9]+]]], sendmsg(MSG_RTN_GET_REALTIME)
15+
define amdgpu_kernel void @test_readsteadycounter(ptr addrspace(1) %out) #0 {
16+
%cycle0 = call i64 @llvm.readsteadycounter()
17+
store volatile i64 %cycle0, ptr addrspace(1) %out
18+
19+
%cycle1 = call i64 @llvm.readsteadycounter()
20+
store volatile i64 %cycle1, ptr addrspace(1) %out
21+
ret void
22+
}
23+
24+
attributes #0 = { nounwind }

llvm/test/CodeGen/NVPTX/intrinsics.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,17 @@ define i64 @test_cyclecounter() {
162162
ret i64 %ret
163163
}
164164

165+
; CHECK-LABEL: test_steadycounter
166+
define i64 @test_steadycounter() {
167+
; CHECK: mov.u64 %r{{.*}}, %globaltimer;
168+
%a = tail call i64 @llvm.readsteadycounter()
169+
; CHECK: mov.u64 %r{{.*}}, %globaltimer;
170+
%b = tail call i64 @llvm.readsteadycounter()
171+
%ret = add i64 %a, %b
172+
; CHECK: ret
173+
ret i64 %ret
174+
}
175+
165176
declare float @llvm.fabs.f32(float)
166177
declare double @llvm.fabs.f64(double)
167178
declare float @llvm.nvvm.sqrt.f(float)
@@ -178,3 +189,4 @@ declare i64 @llvm.nvvm.read.ptx.sreg.clock64()
178189
declare void @llvm.nvvm.exit()
179190
declare i64 @llvm.nvvm.read.ptx.sreg.globaltimer()
180191
declare i64 @llvm.readcyclecounter()
192+
declare i64 @llvm.readsteadycounter()

0 commit comments

Comments
 (0)