@@ -20,6 +20,10 @@ void reg_count() {
20
20
const double COMPENSATE = 0.01 ;
21
21
const double THRESHOLD = 3 ;
22
22
23
+ const uint32_t NGRP_MIN = 1 ;
24
+ const uint32_t NGRP_MAX = 64 ;
25
+ const uint32_t NGRP_STEP = 1 ;
26
+
23
27
uint32_t NITER;
24
28
25
29
auto bench = [&](uint32_t nthread, uint32_t ngrp, uint32_t nreg) {
@@ -68,6 +72,41 @@ void reg_count() {
68
72
} else {
69
73
std::cout << nreg_max << " available at most" << std::endl;
70
74
}
75
+
76
+ auto find_ngrp_by_nreg = [&](uint32_t nreg) {
77
+ DtJumpFinder<5 > dj (COMPENSATE, THRESHOLD);
78
+ for (auto ngrp = NGRP_MIN; ngrp <= NGRP_MAX; ngrp += NGRP_STEP) {
79
+ auto time = bench (1 , ngrp, nreg);
80
+ std::cout << " Testing occupation (nreg=" << nreg << " ); ngrp=" << ngrp
81
+ << " , time=" << time << " us" << std::endl;
82
+
83
+ if (dj.push (time)) {
84
+ ngrp -= NGRP_STEP;
85
+ std::cout << " Using " << nreg << " registers can have " << ngrp
86
+ << " concurrent single-thread workgroups" << std::endl;
87
+ return ngrp;
88
+ }
89
+ }
90
+ std::cout
91
+ << " Unable to conclude a maximum number of concurrent single-thread workgroups when "
92
+ << nreg << " registers are occupied" << std::endl;
93
+ return (uint32_t )1 ;
94
+ };
95
+
96
+ uint32_t ngrp_full, ngrp_half;
97
+ ngrp_full = find_ngrp_by_nreg (nreg_max);
98
+ ngrp_half = find_ngrp_by_nreg (nreg_max / 2 );
99
+
100
+ std::string reg_ty;
101
+
102
+ if (ngrp_full * 1.5 < ngrp_half) {
103
+ std::cout << " All physical threads in an sm share " << nreg_max
104
+ << " registers" << std::endl;
105
+
106
+ } else {
107
+ std::cout << " Each physical thread has " << nreg_max << " registers"
108
+ << std::endl;
109
+ }
71
110
}
72
111
73
112
int main (int argc, const char ** argv) {
0 commit comments