@@ -128,6 +128,15 @@ bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
128
128
return true ;
129
129
}
130
130
131
+ static std::optional<unsigned >
132
+ findOneNVVMAnnotation (const GlobalValue &GV, const std::string &PropName) {
133
+ unsigned RetVal;
134
+ bool Found = findOneNVVMAnnotation (&GV, PropName, RetVal);
135
+ if (Found)
136
+ return RetVal;
137
+ return std::nullopt;
138
+ }
139
+
131
140
bool findAllNVVMAnnotation (const GlobalValue *gv, const std::string &prop,
132
141
std::vector<unsigned > &retval) {
133
142
auto &AC = getAnnotationCache ();
@@ -252,32 +261,57 @@ std::string getSamplerName(const Value &val) {
252
261
return std::string (val.getName ());
253
262
}
254
263
255
- bool getMaxNTIDx (const Function &F, unsigned &x ) {
256
- return findOneNVVMAnnotation (& F, " maxntidx" , x );
264
+ std::optional< unsigned > getMaxNTIDx (const Function &F) {
265
+ return findOneNVVMAnnotation (F, " maxntidx" );
257
266
}
258
267
259
- bool getMaxNTIDy (const Function &F, unsigned &y ) {
260
- return findOneNVVMAnnotation (& F, " maxntidy" , y );
268
+ std::optional< unsigned > getMaxNTIDy (const Function &F) {
269
+ return findOneNVVMAnnotation (F, " maxntidy" );
261
270
}
262
271
263
- bool getMaxNTIDz (const Function &F, unsigned &z) {
264
- return findOneNVVMAnnotation (&F, " maxntidz" , z);
272
+ std::optional<unsigned > getMaxNTIDz (const Function &F) {
273
+ return findOneNVVMAnnotation (F, " maxntidz" );
274
+ }
275
+
276
+ std::optional<unsigned > getMaxNTID (const Function &F) {
277
+ // Note: The semantics here are a bit strange. The PTX ISA states the
278
+ // following (11.4.2. Performance-Tuning Directives: .maxntid):
279
+ //
280
+ // Note that this directive guarantees that the total number of threads does
281
+ // not exceed the maximum, but does not guarantee that the limit in any
282
+ // particular dimension is not exceeded.
283
+ std::optional<unsigned > MaxNTIDx = getMaxNTIDx (F);
284
+ std::optional<unsigned > MaxNTIDy = getMaxNTIDy (F);
285
+ std::optional<unsigned > MaxNTIDz = getMaxNTIDz (F);
286
+ if (MaxNTIDx || MaxNTIDy || MaxNTIDz)
287
+ return MaxNTIDx.value_or (1 ) * MaxNTIDy.value_or (1 ) * MaxNTIDz.value_or (1 );
288
+ return std::nullopt;
265
289
}
266
290
267
291
bool getMaxClusterRank (const Function &F, unsigned &x) {
268
292
return findOneNVVMAnnotation (&F, " maxclusterrank" , x);
269
293
}
270
294
271
- bool getReqNTIDx (const Function &F, unsigned &x) {
272
- return findOneNVVMAnnotation (&F, " reqntidx" , x);
295
+ std::optional<unsigned > getReqNTIDx (const Function &F) {
296
+ return findOneNVVMAnnotation (F, " reqntidx" );
297
+ }
298
+
299
+ std::optional<unsigned > getReqNTIDy (const Function &F) {
300
+ return findOneNVVMAnnotation (F, " reqntidy" );
273
301
}
274
302
275
- bool getReqNTIDy (const Function &F, unsigned &y ) {
276
- return findOneNVVMAnnotation (& F, " reqntidy " , y );
303
+ std::optional< unsigned > getReqNTIDz (const Function &F) {
304
+ return findOneNVVMAnnotation (F, " reqntidz " );
277
305
}
278
306
279
- bool getReqNTIDz (const Function &F, unsigned &z) {
280
- return findOneNVVMAnnotation (&F, " reqntidz" , z);
307
+ std::optional<unsigned > getReqNTID (const Function &F) {
308
+ // Note: The semantics here are a bit strange. See getMaxNTID.
309
+ std::optional<unsigned > ReqNTIDx = getReqNTIDx (F);
310
+ std::optional<unsigned > ReqNTIDy = getReqNTIDy (F);
311
+ std::optional<unsigned > ReqNTIDz = getReqNTIDz (F);
312
+ if (ReqNTIDx || ReqNTIDy || ReqNTIDz)
313
+ return ReqNTIDx.value_or (1 ) * ReqNTIDy.value_or (1 ) * ReqNTIDz.value_or (1 );
314
+ return std::nullopt;
281
315
}
282
316
283
317
bool getMinCTASm (const Function &F, unsigned &x) {
0 commit comments