@@ -340,43 +340,49 @@ template <class T, int N> struct ImplDec {
340
340
}
341
341
};
342
342
343
+ // The purpose of this is validate that floating point data is correctly
344
+ // processed.
345
+ constexpr float FPDELTA = 0 .5f ;
346
+
343
347
template <class T , int N, class C , C Op> struct ImplAdd {
344
348
static constexpr C atomic_op = Op;
345
349
static constexpr int n_args = 1 ;
346
350
347
- static T init (int i, const Config &cfg) { return (T) 0 ; }
351
+ static T init (int i, const Config &cfg) { return 0 ; }
348
352
349
353
static T gold (int i, const Config &cfg) {
350
- T gold = is_updated (i, N, cfg)
351
- ? (T)(cfg. repeat * cfg. threads_per_group * cfg.n_groups )
352
- : init (i, cfg);
354
+ T gold = is_updated (i, N, cfg) ? (T)(cfg. repeat * cfg. threads_per_group *
355
+ cfg.n_groups * (T)( 1 + FPDELTA) )
356
+ : init (i, cfg);
353
357
return gold;
354
358
}
355
359
356
- static T arg0 (int i) { return 1 ; }
360
+ static T arg0 (int i) { return (T)( 1 + FPDELTA) ; }
357
361
};
358
362
359
363
template <class T , int N, class C , C Op> struct ImplSub {
360
364
static constexpr C atomic_op = Op;
361
365
static constexpr int n_args = 1 ;
362
- static constexpr int base = 5 ;
366
+ static constexpr T base = (T)( 5 + FPDELTA) ;
363
367
364
368
static T init (int i, const Config &cfg) {
365
- return (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups + base);
369
+ return (T)(cfg.repeat * cfg.threads_per_group * cfg.n_groups *
370
+ (T)(1 + FPDELTA) +
371
+ base);
366
372
}
367
373
368
374
static T gold (int i, const Config &cfg) {
369
- T gold = is_updated (i, N, cfg) ? (T) base : init (i, cfg);
375
+ T gold = is_updated (i, N, cfg) ? base : init (i, cfg);
370
376
return gold;
371
377
}
372
378
373
- static T arg0 (int i) { return 1 ; }
379
+ static T arg0 (int i) { return (T)( 1 + FPDELTA) ; }
374
380
};
375
381
376
382
template <class T , int N, class C , C Op> struct ImplMin {
377
383
static constexpr C atomic_op = Op;
378
384
static constexpr int n_args = 1 ;
379
- static constexpr int MIN = 1 ;
385
+ static constexpr T MIN = (T)( 1 + FPDELTA) ;
380
386
381
387
static T init (int i, const Config &cfg) {
382
388
return (T)(cfg.threads_per_group * cfg.n_groups + MIN + 1 );
@@ -393,18 +399,18 @@ template <class T, int N, class C, C Op> struct ImplMin {
393
399
template <class T , int N, class C , C Op> struct ImplMax {
394
400
static constexpr C atomic_op = Op;
395
401
static constexpr int n_args = 1 ;
396
- static constexpr int base = 5 ;
402
+ static constexpr T base = (T)( 5 + FPDELTA) ;
397
403
398
- static T init (int i, const Config &cfg) { return 0 ; }
404
+ static T init (int i, const Config &cfg) { return (T)FPDELTA ; }
399
405
400
406
static T gold (int i, const Config &cfg) {
401
407
T gold = is_updated (i, N, cfg)
402
- ? (T)(cfg.threads_per_group * cfg.n_groups - 1 )
408
+ ? (T)(cfg.threads_per_group * cfg.n_groups - 1 + FPDELTA )
403
409
: init (i, cfg);
404
410
return gold;
405
411
}
406
412
407
- static T arg0 (int i) { return i ; }
413
+ static T arg0 (int i) { return (T)(i + FPDELTA) ; }
408
414
};
409
415
410
416
template <class T , int N>
@@ -444,7 +450,7 @@ struct ImplLSCFmax : ImplMax<T, N, LSCAtomicOp, LSCAtomicOp::fmax> {};
444
450
template <class T , int N, class C , C Op> struct ImplCmpxchgBase {
445
451
static constexpr C atomic_op = Op;
446
452
static constexpr int n_args = 2 ;
447
- static constexpr int base = 2 ;
453
+ static constexpr T base = (T)( 2 + FPDELTA) ;
448
454
449
455
static T init (int i, const Config &cfg) { return base - 1 ; }
450
456
0 commit comments