Skip to content

Commit 626a31d

Browse files
[libomptarget] Add register usage info to kernel metadata
Add register usage information to the runtime metadata so that it can be used during kernel launch (that change will be in a different commit). Add this information to the kernel trace. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D98829
1 parent 961e438 commit 626a31d

File tree

3 files changed

+46
-4
lines changed

3 files changed

+46
-4
lines changed

openmp/libomptarget/plugins/amdgpu/impl/internal.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,10 @@ typedef struct atl_kernel_info_s {
9797
uint64_t kernel_object;
9898
uint32_t group_segment_size;
9999
uint32_t private_segment_size;
100+
uint32_t sgpr_count;
101+
uint32_t vgpr_count;
102+
uint32_t sgpr_spill_count;
103+
uint32_t vgpr_spill_count;
100104
uint32_t kernel_segment_size;
101105
uint32_t num_args;
102106
std::vector<uint64_t> arg_alignments;

openmp/libomptarget/plugins/amdgpu/impl/system.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,31 @@ static hsa_status_t get_code_object_custom_metadata(void *binary,
832832
msgpack_errors += map_lookup_string(element, ".symbol", &symbolName);
833833
msgpackErrorCheck(strings lookup in kernel metadata, msgpack_errors);
834834

835-
atl_kernel_info_t info = {0, 0, 0, 0, 0, {}, {}, {}};
835+
atl_kernel_info_t info = {0, 0, 0, 0, 0, 0, 0, 0, 0, {}, {}, {}};
836+
837+
uint64_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count;
838+
msgpack_errors += map_lookup_uint64_t(element, ".sgpr_count", &sgpr_count);
839+
msgpackErrorCheck(sgpr count metadata lookup in kernel metadata,
840+
msgpack_errors);
841+
info.sgpr_count = sgpr_count;
842+
843+
msgpack_errors += map_lookup_uint64_t(element, ".vgpr_count", &vgpr_count);
844+
msgpackErrorCheck(vgpr count metadata lookup in kernel metadata,
845+
msgpack_errors);
846+
info.vgpr_count = vgpr_count;
847+
848+
msgpack_errors +=
849+
map_lookup_uint64_t(element, ".sgpr_spill_count", &sgpr_spill_count);
850+
msgpackErrorCheck(sgpr spill count metadata lookup in kernel metadata,
851+
msgpack_errors);
852+
info.sgpr_spill_count = sgpr_spill_count;
853+
854+
msgpack_errors +=
855+
map_lookup_uint64_t(element, ".vgpr_spill_count", &vgpr_spill_count);
856+
msgpackErrorCheck(vgpr spill count metadata lookup in kernel metadata,
857+
msgpack_errors);
858+
info.vgpr_spill_count = vgpr_spill_count;
859+
836860
size_t kernel_explicit_args_size = 0;
837861
uint64_t kernel_segment_size;
838862
msgpack_errors += map_lookup_uint64_t(element, ".kernarg_segment_size",

openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1759,6 +1759,19 @@ int32_t __tgt_rtl_run_target_team_region_locked(
17591759

17601760
KernelTy *KernelInfo = (KernelTy *)tgt_entry_ptr;
17611761

1762+
std::string kernel_name = std::string(KernelInfo->Name);
1763+
uint32_t sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count;
1764+
1765+
{
1766+
assert(KernelInfoTable[device_id].find(kernel_name) !=
1767+
KernelInfoTable[device_id].end());
1768+
auto it = KernelInfoTable[device_id][kernel_name];
1769+
sgpr_count = it.sgpr_count;
1770+
vgpr_count = it.vgpr_count;
1771+
sgpr_spill_count = it.sgpr_spill_count;
1772+
vgpr_spill_count = it.vgpr_spill_count;
1773+
}
1774+
17621775
/*
17631776
* Set limit based on ThreadsPerGroup and GroupsPerDevice
17641777
*/
@@ -1780,10 +1793,12 @@ int32_t __tgt_rtl_run_target_team_region_locked(
17801793
bool traceToStdout = print_kernel_trace & (RTL_TO_STDOUT | RTL_TIMING);
17811794
fprintf(traceToStdout ? stdout : stderr,
17821795
"DEVID:%2d SGN:%1d ConstWGSize:%-4d args:%2d teamsXthrds:(%4dX%4d) "
1783-
"reqd:(%4dX%4d) n:%s\n",
1796+
"reqd:(%4dX%4d) sgpr_count:%u vgpr_count:%u sgpr_spill_count:%u "
1797+
"vgpr_spill_count:%u tripcount:%lu n:%s\n",
17841798
device_id, KernelInfo->ExecutionMode, KernelInfo->ConstWGSize,
17851799
arg_num, num_groups, threadsPerGroup, num_teams, thread_limit,
1786-
KernelInfo->Name);
1800+
sgpr_count, vgpr_count, sgpr_spill_count, vgpr_spill_count,
1801+
loop_tripcount, KernelInfo->Name);
17871802
}
17881803

17891804
// Run on the device.
@@ -1812,7 +1827,6 @@ int32_t __tgt_rtl_run_target_team_region_locked(
18121827
packet->reserved2 = 0; // atmi writes id_ here
18131828
packet->completion_signal = {0}; // may want a pool of signals
18141829

1815-
std::string kernel_name = std::string(KernelInfo->Name);
18161830
{
18171831
assert(KernelInfoTable[device_id].find(kernel_name) !=
18181832
KernelInfoTable[device_id].end());

0 commit comments

Comments
 (0)