|
13 | 13 | #include "stats.h"
|
14 | 14 | #include "utils.h"
|
15 | 15 |
|
16 |
| -void reg_count() { |
17 |
| - const uint32_t NREG_MIN = 1; |
18 |
| - const uint32_t NREG_MAX = 512; |
19 |
| - const uint32_t NREG_STEP = 1; |
20 |
| - |
21 |
| - const double COMPENSATE = 0.01; |
22 |
| - const double THRESHOLD = 3; |
23 |
| - |
24 |
| - const uint32_t NGRP_MIN = 1; |
25 |
| - const uint32_t NGRP_MAX = 64; |
26 |
| - const uint32_t NGRP_STEP = 1; |
27 |
| - |
28 |
| - uint32_t NITER; |
29 |
| - |
30 |
| - auto bench = [&](uint32_t ngrp, uint32_t nreg) { |
31 |
| - size_t len = sizeof(float); |
32 |
| - StorageBuffer buffer(context(), vkapi::kFloat, len); |
33 |
| - ParamsBuffer params(context(), int32_t(len)); |
34 |
| - vkapi::PipelineBarrier pipeline_barrier{}; |
35 |
| - |
36 |
| - auto shader_name = "reg_count_" + std::to_string(nreg); |
37 |
| - |
38 |
| - auto time = benchmark_on_gpu(shader_name, NITER, [&]() { |
39 |
| - context()->submit_compute_job( |
40 |
| - VK_KERNEL_FROM_STR(shader_name), |
41 |
| - pipeline_barrier, |
42 |
| - {1, ngrp, 1}, |
43 |
| - {1, 1, 1}, |
44 |
| - {SV(NITER)}, |
45 |
| - VK_NULL_HANDLE, |
46 |
| - 0, |
47 |
| - buffer.buffer(), |
48 |
| - params.buffer()); |
49 |
| - }); |
50 |
| - return time; |
51 |
| - }; |
52 |
| - |
53 |
| - std::cout << "Calculating NITER..." << std::endl; |
54 |
| - ensure_min_niter(1000, NITER, [&]() { return bench(1, NREG_MIN); }); |
55 |
| - std::cout << "NITER," << NITER << std::endl; |
56 |
| - |
57 |
| - uint32_t nreg_max; |
58 |
| - |
59 |
| - DtJumpFinder<5> dj(COMPENSATE, THRESHOLD); |
60 |
| - uint32_t nreg = NREG_MIN; |
61 |
| - for (; nreg <= NREG_MAX; nreg += NREG_STEP) { |
62 |
| - double time = bench(1, nreg); |
63 |
| - std::cout << "Testing nreg=\t" << nreg << "\tTime=\t" << time << std::endl; |
64 |
| - if (dj.push(time)) { |
65 |
| - nreg -= NREG_STEP; |
66 |
| - nreg_max = nreg; |
67 |
| - break; |
| 16 | +using namespace vkapi; |
| 17 | + |
| 18 | +class App { |
| 19 | + private: |
| 20 | + VkPhysicalDevice device_handle_; |
| 21 | + uint32_t sm_count_; |
| 22 | + |
| 23 | + int get_sm_count() { |
| 24 | + if (!context()->adapter_ptr()->has_sm_builtins()) { |
| 25 | + return 0; |
68 | 26 | }
|
| 27 | + |
| 28 | + VkPhysicalDeviceShaderSMBuiltinsPropertiesNV smBuiltinProperties = { |
| 29 | + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SM_BUILTINS_PROPERTIES_NV}; |
| 30 | + |
| 31 | + VkPhysicalDeviceProperties2 deviceProperties = { |
| 32 | + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2}; |
| 33 | + deviceProperties.pNext = &smBuiltinProperties; |
| 34 | + vkGetPhysicalDeviceProperties2(device_handle_, &deviceProperties); |
| 35 | + |
| 36 | + return smBuiltinProperties.shaderSMCount; |
69 | 37 | }
|
70 |
| - if (nreg >= NREG_MAX) { |
71 |
| - std::cout << "Unable to conclude a maximal register count" << std::endl; |
72 |
| - nreg_max = NREG_STEP; |
73 |
| - } else { |
74 |
| - std::cout << nreg_max << " registers are available at most" << std::endl; |
| 38 | + |
| 39 | + public: |
| 40 | + bool initialize(uint32_t sm_count) { |
| 41 | + device_handle_ = context()->adapter_ptr()->physical_handle(); |
| 42 | + context()->initialize_querypool(); |
| 43 | + |
| 44 | + std::cout << context()->adapter_ptr()->stringize() << "\n\n"; |
| 45 | + |
| 46 | + sm_count_ = get_sm_count(); |
| 47 | + |
| 48 | + if (sm_count_ > 0) { |
| 49 | + } else if (sm_count > 0) { |
| 50 | + sm_count_ = sm_count; |
| 51 | + } else { |
| 52 | + std::cout |
| 53 | + << "SM counter is not available, please specify the SM count as an argument of this binary." |
| 54 | + << std::endl; |
| 55 | + return false; |
| 56 | + } |
| 57 | + |
| 58 | + std::cout << "\nSM count," << sm_count_ << std::endl; |
| 59 | + return true; |
75 | 60 | }
|
76 | 61 |
|
77 |
| - auto find_ngrp_by_nreg = [&](const uint32_t nreg) { |
78 |
| - DtJumpFinder<5> dj(COMPENSATE, THRESHOLD); |
79 |
| - for (auto ngrp = NGRP_MIN; ngrp <= NGRP_MAX; ngrp += NGRP_STEP) { |
80 |
| - auto time = bench(ngrp, nreg); |
81 |
| - std::cout << "Testing occupation (nreg=" << nreg << "); ngrp=" << ngrp |
82 |
| - << ", time=" << time << " us" << std::endl; |
| 62 | + void reg_count() { |
| 63 | + std::cout << "\n------ Register Count ------" << std::endl; |
| 64 | + const uint32_t NREG_MIN = 1; |
| 65 | + const uint32_t NREG_MAX = 512; |
| 66 | + const uint32_t NREG_STEP = 1; |
| 67 | + |
| 68 | + const double COMPENSATE = 0.01; |
| 69 | + const double THRESHOLD = 3; |
| 70 | + |
| 71 | + const uint32_t NGRP_MIN = 1; |
| 72 | + const uint32_t NGRP_MAX = 64; |
| 73 | + const uint32_t NGRP_STEP = 1; |
| 74 | + |
| 75 | + uint32_t NITER; |
| 76 | + |
| 77 | + auto bench = [&](uint32_t ngrp, uint32_t nreg) { |
| 78 | + size_t len = sizeof(float); |
| 79 | + StorageBuffer buffer(context(), vkapi::kFloat, len); |
| 80 | + ParamsBuffer params(context(), int32_t(len)); |
| 81 | + vkapi::PipelineBarrier pipeline_barrier{}; |
| 82 | + |
| 83 | + auto shader_name = "reg_count_" + std::to_string(nreg); |
| 84 | + |
| 85 | + auto time = benchmark_on_gpu(shader_name, NITER, [&]() { |
| 86 | + context()->submit_compute_job( |
| 87 | + VK_KERNEL_FROM_STR(shader_name), |
| 88 | + pipeline_barrier, |
| 89 | + {1, ngrp, 1}, |
| 90 | + {1, 1, 1}, |
| 91 | + {SV(NITER)}, |
| 92 | + VK_NULL_HANDLE, |
| 93 | + 0, |
| 94 | + buffer.buffer(), |
| 95 | + params.buffer()); |
| 96 | + }); |
| 97 | + return time; |
| 98 | + }; |
| 99 | + |
| 100 | + std::cout << "Calculating NITER..." << std::endl; |
| 101 | + ensure_min_niter(1000, NITER, [&]() { return bench(1, NREG_MIN); }); |
| 102 | + std::cout << "NITER," << NITER << std::endl; |
| 103 | + |
| 104 | + uint32_t nreg_max; |
83 | 105 |
|
| 106 | + DtJumpFinder<5> dj(COMPENSATE, THRESHOLD); |
| 107 | + uint32_t nreg = NREG_MIN; |
| 108 | + for (; nreg <= NREG_MAX; nreg += NREG_STEP) { |
| 109 | + double time = bench(1, nreg); |
| 110 | + std::cout << "Testing nreg=\t" << nreg << "\tTime=\t" << time |
| 111 | + << std::endl; |
84 | 112 | if (dj.push(time)) {
|
85 |
| - ngrp -= NGRP_STEP; |
86 |
| - std::cout << "Using " << nreg << " registers can have " << ngrp |
87 |
| - << " concurrent single-thread workgroups" << std::endl; |
88 |
| - return ngrp; |
| 113 | + nreg -= NREG_STEP; |
| 114 | + nreg_max = nreg; |
| 115 | + break; |
89 | 116 | }
|
90 | 117 | }
|
91 |
| - std::cout |
92 |
| - << "Unable to conclude a maximum number of concurrent single-thread workgroups when " |
93 |
| - << nreg << " registers are occupied" << std::endl; |
94 |
| - return (uint32_t)1; |
95 |
| - }; |
96 |
| - |
97 |
| - uint32_t ngrp_full, ngrp_half; |
98 |
| - ngrp_full = find_ngrp_by_nreg(nreg_max); |
99 |
| - ngrp_half = find_ngrp_by_nreg(nreg_max / 2); |
100 |
| - |
101 |
| - std::string reg_ty; |
| 118 | + if (nreg >= NREG_MAX) { |
| 119 | + std::cout << "Unable to conclude a maximal register count" << std::endl; |
| 120 | + nreg_max = NREG_STEP; |
| 121 | + } else { |
| 122 | + std::cout << nreg_max << " registers are available at most" << std::endl; |
| 123 | + } |
102 | 124 |
|
103 |
| - if (ngrp_full * 1.5 < ngrp_half) { |
104 |
| - std::cout << "All physical threads in an sm share " << nreg_max |
105 |
| - << " registers" << std::endl; |
106 |
| - reg_ty = "Pooled"; |
| 125 | + auto find_ngrp_by_nreg = [&](const uint32_t nreg) { |
| 126 | + DtJumpFinder<5> dj(COMPENSATE, THRESHOLD); |
| 127 | + for (auto ngrp = NGRP_MIN; ngrp <= NGRP_MAX; ngrp += NGRP_STEP) { |
| 128 | + auto time = bench(ngrp, nreg); |
| 129 | + std::cout << "Testing occupation (nreg=" << nreg << "); ngrp=" << ngrp |
| 130 | + << ", time=" << time << " us" << std::endl; |
| 131 | + |
| 132 | + if (dj.push(time)) { |
| 133 | + ngrp -= NGRP_STEP; |
| 134 | + std::cout << "Using " << nreg << " registers can have " << ngrp |
| 135 | + << " concurrent single-thread workgroups" << std::endl; |
| 136 | + return ngrp; |
| 137 | + } |
| 138 | + } |
| 139 | + std::cout |
| 140 | + << "Unable to conclude a maximum number of concurrent single-thread workgroups when " |
| 141 | + << nreg << " registers are occupied" << std::endl; |
| 142 | + return (uint32_t)1; |
| 143 | + }; |
| 144 | + |
| 145 | + uint32_t ngrp_full, ngrp_half; |
| 146 | + ngrp_full = find_ngrp_by_nreg(nreg_max); |
| 147 | + ngrp_half = find_ngrp_by_nreg(nreg_max / 2); |
| 148 | + |
| 149 | + std::string reg_ty; |
| 150 | + |
| 151 | + if (ngrp_full * 1.5 < ngrp_half) { |
| 152 | + std::cout << "All physical threads in an sm share " << nreg_max |
| 153 | + << " registers" << std::endl; |
| 154 | + reg_ty = "Pooled"; |
| 155 | + |
| 156 | + } else { |
| 157 | + std::cout << "Each physical thread has " << nreg_max << " registers" |
| 158 | + << std::endl; |
| 159 | + reg_ty = "Dedicated"; |
| 160 | + } |
107 | 161 |
|
108 |
| - } else { |
109 |
| - std::cout << "Each physical thread has " << nreg_max << " registers" |
| 162 | + std::cout << "\n\nNITER," << NITER << std::endl; |
| 163 | + std::cout << "Max registers," << nreg_max << std::endl; |
| 164 | + std::cout << "Concurrent full single thread workgroups," << ngrp_full |
| 165 | + << std::endl; |
| 166 | + std::cout << "Concurrent half single thread workgroups," << ngrp_half |
110 | 167 | << std::endl;
|
111 |
| - reg_ty = "Dedicated"; |
| 168 | + std::cout << "Register type," << reg_ty << std::endl; |
112 | 169 | }
|
113 |
| - |
114 |
| - std::cout << "\n\nNITER," << NITER << std::endl; |
115 |
| - std::cout << "Max registers," << nreg_max << std::endl; |
116 |
| - std::cout << "Concurrent full single thread workgroups," << ngrp_full |
117 |
| - << std::endl; |
118 |
| - std::cout << "Concurrent half single thread workgroups," << ngrp_half |
119 |
| - << std::endl; |
120 |
| - std::cout << "Register type," << reg_ty << std::endl; |
121 |
| -} |
| 170 | +}; |
122 | 171 |
|
123 | 172 | int main(int argc, const char** argv) {
|
124 |
| - context()->initialize_querypool(); |
| 173 | + App app; |
| 174 | + |
| 175 | + int32_t sm_count = 0; |
| 176 | + |
| 177 | + if (argc == 2) { |
| 178 | + sm_count = atoi(argv[1]); |
| 179 | + if (sm_count <= 0) { |
| 180 | + std::cout << "Invalid SM count" << std::endl; |
| 181 | + return 1; |
| 182 | + } |
| 183 | + } else if (argc > 2) { |
| 184 | + std::cout << "Usage: vulkan_gpuinfo [sm count]" << std::endl; |
| 185 | + return 1; |
| 186 | + } |
125 | 187 |
|
126 |
| - reg_count(); |
| 188 | + if (!app.initialize(sm_count)) { |
| 189 | + return 1; |
| 190 | + } |
127 | 191 |
|
| 192 | + app.reg_count(); |
128 | 193 | return 0;
|
129 | 194 | }
|
0 commit comments